1 {-|
    2 Convert account data in CSV format (eg downloaded from a bank) to journal
    3 format, and print it on stdout. See the manual for more details.
    4 -}
    5 
    6 module Hledger.Cli.Convert where
    7 import Control.Monad (when, guard, liftM)
    8 import Data.Maybe
    9 import Data.Time.Format (parseTime)
   10 import Safe
   11 import System.Directory (doesFileExist)
   12 import System.Exit (exitFailure)
   13 import System.FilePath (takeBaseName, replaceExtension)
   14 import System.IO (stderr)
   15 import System.Locale (defaultTimeLocale)
   16 import Test.HUnit
   17 import Text.CSV (parseCSV, parseCSVFromFile, printCSV, CSV)
   18 import Text.ParserCombinators.Parsec
   19 import Text.Printf (hPrintf)
   20 
   21 import Prelude hiding (getContents)
   22 import Hledger.Utils.UTF8 (getContents)
   23 import Hledger
   24 import Hledger.Cli.Format
   25 import qualified Hledger.Cli.Format as Format
   26 import Hledger.Cli.Version
   27 import Hledger.Cli.Options
   28 
   29 {- |
   30 A set of data definitions and account-matching patterns sufficient to
   31 convert a particular CSV data file into meaningful journal transactions. See above.
   32 -}
   33 data CsvRules = CsvRules {
   34       dateField :: Maybe FieldPosition,
   35       dateFormat :: Maybe String,
   36       statusField :: Maybe FieldPosition,
   37       codeField :: Maybe FieldPosition,
   38       descriptionField :: [FormatString],
   39       amountField :: Maybe FieldPosition,
   40       inField :: Maybe FieldPosition,
   41       outField :: Maybe FieldPosition,
   42       currencyField :: Maybe FieldPosition,
   43       baseCurrency :: Maybe String,
   44       accountField :: Maybe FieldPosition,
   45       account2Field :: Maybe FieldPosition,
   46       effectiveDateField :: Maybe FieldPosition,
   47       baseAccount :: AccountName,
   48       accountRules :: [AccountRule]
   49 } deriving (Show, Eq)
   50 
   51 nullrules = CsvRules {
   52       dateField=Nothing,
   53       dateFormat=Nothing,
   54       statusField=Nothing,
   55       codeField=Nothing,
   56       descriptionField=[],
   57       amountField=Nothing,
   58       inField=Nothing,
   59       outField=Nothing,
   60       currencyField=Nothing,
   61       baseCurrency=Nothing,
   62       accountField=Nothing,
   63       account2Field=Nothing,
   64       effectiveDateField=Nothing,
   65       baseAccount="unknown",
   66       accountRules=[]
   67 }
   68 
   69 type FieldPosition = Int
   70 
   71 type AccountRule = (
   72    [(String, Maybe String)] -- list of regex match patterns with optional replacements
   73   ,AccountName              -- account name to use for a transaction matching this rule
   74   )
   75 
   76 type CsvRecord = [String]
   77 
   78 
   79 -- | Read the CSV file named as an argument and print equivalent journal transactions,
   80 -- using/creating a .rules file.
   81 convert :: CliOpts -> IO ()
   82 convert opts = do
   83   let csvfile = case headDef "" $ patterns_ $ reportopts_ opts of
   84                   "" -> "-"
   85                   s -> s
   86       usingStdin = csvfile == "-"
   87       rulesFileSpecified = isJust $ rules_file_ opts
   88       rulesfile = rulesFileFor opts csvfile
   89   when (usingStdin && (not rulesFileSpecified)) $ error' "please use --rules to specify a rules file when converting stdin"
   90   csvparse <- parseCsv csvfile
   91   let records = case csvparse of
   92                   Left e -> error' $ show e
   93                   Right rs -> reverse $ filter (/= [""]) rs
   94   exists <- doesFileExist rulesfile
   95   if (not exists) then do
   96                   hPrintf stderr "creating conversion rules file %s, edit this file for better results\n" rulesfile
   97                   writeFile rulesfile initialRulesFileContent
   98    else
   99       hPrintf stderr "using conversion rules file %s\n" rulesfile
  100   rules <- liftM (either (error'.show) id) $ parseCsvRulesFile rulesfile
  101   let invalid = validateRules rules
  102   when (debug_ opts) $ hPrintf stderr "rules: %s\n" (show rules)
  103   when (isJust invalid) $ error (fromJust invalid)
  104   let requiredfields = max 2 (maxFieldIndex rules + 1)
  105       badrecords = take 1 $ filter ((< requiredfields).length) records
  106   if null badrecords
  107    then mapM_ (printTxn (debug_ opts) rules) records
  108    else do
  109      hPrintf stderr (unlines [
  110                       "Warning, at least one CSV record does not contain a field referenced by the"
  111                      ,"conversion rules file, or has less than two fields. Are you converting a"
  112                      ,"valid CSV file ? First bad record:\n%s"
  113                      ]) (show $ head badrecords)
  114      exitFailure
  115 
  116 parseCsv :: FilePath -> IO (Either ParseError CSV)
  117 parseCsv path =
  118   case path of
  119     "-" -> liftM (parseCSV "(stdin)") getContents
  120     p   -> parseCSVFromFile p
  121 
  122 -- | The highest (0-based) field index referenced in the field
  123 -- definitions, or -1 if no fields are defined.
  124 maxFieldIndex :: CsvRules -> Int
  125 maxFieldIndex r = maximumDef (-1) $ catMaybes [
  126                    dateField r
  127                   ,statusField r
  128                   ,codeField r
  129                   ,amountField r
  130                   ,inField r
  131                   ,outField r
  132                   ,currencyField r
  133                   ,accountField r
  134                   ,account2Field r
  135                   ,effectiveDateField r
  136                   ]
  137 
  138 rulesFileFor :: CliOpts -> FilePath -> FilePath
  139 rulesFileFor CliOpts{rules_file_=Just f} _ = f
  140 rulesFileFor CliOpts{rules_file_=Nothing} csvfile = replaceExtension csvfile ".rules"
  141 
  142 initialRulesFileContent :: String
  143 initialRulesFileContent =
  144     "# csv conversion rules file generated by "++(progversionstr progname)++"\n" ++
  145     "# Add rules to this file for more accurate conversion, see\n"++
  146     "# http://hledger.org/MANUAL.html#convert\n" ++
  147     "\n" ++
  148     "base-account assets:bank:checking\n" ++
  149     "date-field 0\n" ++
  150     "description-field 4\n" ++
  151     "amount-field 1\n" ++
  152     "currency $\n" ++
  153     "\n" ++
  154     "# account-assigning rules\n" ++
  155     "\n" ++
  156     "SPECTRUM\n" ++
  157     "expenses:health:gym\n" ++
  158     "\n" ++
  159     "ITUNES\n" ++
  160     "BLKBSTR=BLOCKBUSTER\n" ++
  161     "expenses:entertainment\n" ++
  162     "\n" ++
  163     "(TO|FROM) SAVINGS\n" ++
  164     "assets:bank:savings\n"
  165 
  166 validateRules :: CsvRules -> Maybe String
  167 validateRules rules = let
  168     hasAmount = isJust $ amountField rules
  169     hasIn = isJust $ inField rules
  170     hasOut = isJust $ outField rules
  171   in case (hasAmount, hasIn, hasOut) of
  172     (True, True, _) -> Just "Don't specify in-field when specifying amount-field"
  173     (True, _, True) -> Just "Don't specify out-field when specifying amount-field"
  174     (_, False, True) -> Just "Please specify in-field when specifying out-field"
  175     (_, True, False) -> Just "Please specify out-field when specifying in-field"
  176     (False, False, False) -> Just "Please specify either amount-field, or in-field and out-field"
  177     _ -> Nothing
  178 
  179 -- rules file parser
  180 
  181 parseCsvRulesFile :: FilePath -> IO (Either ParseError CsvRules)
  182 parseCsvRulesFile f = do
  183   s <- readFile f
  184   return $ parseCsvRules f s
  185 
  186 parseCsvRules :: FilePath -> String -> Either ParseError CsvRules
  187 parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s
  188 
  189 csvrulesfile :: GenParser Char CsvRules CsvRules
  190 csvrulesfile = do
  191   many blankorcommentline
  192   many definitions
  193   r <- getState
  194   ars <- many accountrule
  195   many blankorcommentline
  196   eof
  197   return r{accountRules=ars}
  198 
  199 definitions :: GenParser Char CsvRules ()
  200 definitions = do
  201   choice' [
  202     datefield
  203    ,dateformat
  204    ,statusfield
  205    ,codefield
  206    ,descriptionfield
  207    ,amountfield
  208    ,infield
  209    ,outfield
  210    ,currencyfield
  211    ,accountfield
  212    ,account2field
  213    ,effectivedatefield
  214    ,basecurrency
  215    ,baseaccount
  216    ,commentline
  217    ] <?> "definition"
  218   return ()
  219 
  220 datefield = do
  221   string "date-field"
  222   many1 spacenonewline
  223   v <- restofline
  224   updateState (\r -> r{dateField=readMay v})
  225 
  226 effectivedatefield = do
  227   string "effective-date-field"
  228   many1 spacenonewline
  229   v <- restofline
  230   updateState (\r -> r{effectiveDateField=readMay v})
  231 
  232 dateformat = do
  233   string "date-format"
  234   many1 spacenonewline
  235   v <- restofline
  236   updateState (\r -> r{dateFormat=Just v})
  237 
  238 codefield = do
  239   string "code-field"
  240   many1 spacenonewline
  241   v <- restofline
  242   updateState (\r -> r{codeField=readMay v})
  243 
  244 statusfield = do
  245   string "status-field"
  246   many1 spacenonewline
  247   v <- restofline
  248   updateState (\r -> r{statusField=readMay v})
  249 
  250 descriptionFieldValue :: GenParser Char st [FormatString]
  251 descriptionFieldValue = do
  252 --      try (fieldNo <* spacenonewline)
  253       try fieldNo
  254   <|> formatStrings
  255   where
  256     fieldNo = many1 digit >>= \x -> return [FormatField False Nothing Nothing $ FieldNo $ read x]
  257 
  258 descriptionfield = do
  259   string "description-field"
  260   many1 spacenonewline
  261   formatS <- descriptionFieldValue
  262   restofline
  263   updateState (\x -> x{descriptionField=formatS})
  264 
  265 amountfield = do
  266   string "amount-field"
  267   many1 spacenonewline
  268   v <- restofline
  269   x <- updateState (\r -> r{amountField=readMay v})
  270   return x
  271 
  272 infield = do
  273   string "in-field"
  274   many1 spacenonewline
  275   v <- restofline
  276   updateState (\r -> r{inField=readMay v})
  277 
  278 outfield = do
  279   string "out-field"
  280   many1 spacenonewline
  281   v <- restofline
  282   updateState (\r -> r{outField=readMay v})
  283 
  284 currencyfield = do
  285   string "currency-field"
  286   many1 spacenonewline
  287   v <- restofline
  288   updateState (\r -> r{currencyField=readMay v})
  289 
  290 accountfield = do
  291   string "account-field"
  292   many1 spacenonewline
  293   v <- restofline
  294   updateState (\r -> r{accountField=readMay v})
  295 
  296 account2field = do
  297   string "account2-field"
  298   many1 spacenonewline
  299   v <- restofline
  300   updateState (\r -> r{account2Field=readMay v})
  301 
  302 basecurrency = do
  303   string "currency"
  304   many1 spacenonewline
  305   v <- restofline
  306   updateState (\r -> r{baseCurrency=Just v})
  307 
  308 baseaccount = do
  309   string "base-account"
  310   many1 spacenonewline
  311   v <- ledgeraccountname
  312   optional newline
  313   updateState (\r -> r{baseAccount=v})
  314 
  315 accountrule :: GenParser Char CsvRules AccountRule
  316 accountrule = do
  317   many blankorcommentline
  318   pats <- many1 matchreplacepattern
  319   guard $ length pats >= 2
  320   let pats' = init pats
  321       acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats
  322   many blankorcommentline
  323   return (pats',acct)
  324  <?> "account rule"
  325 
  326 blanklines = many1 blankline
  327 
  328 blankline = many spacenonewline >> newline >> return () <?> "blank line"
  329 
  330 commentchar = oneOf ";#"
  331 
  332 commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line"
  333 
  334 blankorcommentline = choice' [blankline, commentline]
  335 
  336 matchreplacepattern = do
  337   notFollowedBy commentchar
  338   matchpat <- many1 (noneOf "=\n")
  339   replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"}
  340   newline
  341   return (matchpat,replpat)
  342 
  343 printTxn :: Bool -> CsvRules -> CsvRecord -> IO ()
  344 printTxn debug rules rec = do
  345   when debug $ hPrintf stderr "record: %s" (printCSV [rec])
  346   putStr $ show $ transactionFromCsvRecord rules rec
  347 
  348 -- csv record conversion
  349 formatD :: CsvRecord -> Bool -> Maybe Int -> Maybe Int -> Field -> String
  350 formatD record leftJustified min max f = case f of 
  351   FieldNo n       -> maybe "" show $ atMay record n
  352   -- Some of these might in theory in read from fields
  353   Format.Account  -> ""
  354   DepthSpacer     -> ""
  355   Total           -> ""
  356   DefaultDate     -> ""
  357   Description     -> ""
  358  where
  359    show = formatValue leftJustified min max
  360 
  361 formatDescription :: CsvRecord -> [FormatString] -> String
  362 formatDescription _ [] = ""
  363 formatDescription record (f:fs) = s ++ (formatDescription record fs)
  364   where s = case f of
  365                 FormatLiteral l -> l
  366                 FormatField leftJustified min max field  -> formatD record leftJustified min max field
  367 
  368 transactionFromCsvRecord :: CsvRules -> CsvRecord -> Transaction
  369 transactionFromCsvRecord rules fields =
  370   let 
  371       date = parsedate $ normaliseDate (dateFormat rules) $ maybe "1900/1/1" (atDef "" fields) (dateField rules)
  372       effectivedate = do idx <- effectiveDateField rules
  373                          return $ parsedate $ normaliseDate (dateFormat rules) $ (atDef "" fields) idx
  374       status = maybe False (null . strip . (atDef "" fields)) (statusField rules)
  375       code = maybe "" (atDef "" fields) (codeField rules)
  376       desc = formatDescription fields (descriptionField rules)
  377       comment = ""
  378       precomment = ""
  379       baseacc = maybe (baseAccount rules) (atDef "" fields) (accountField rules)
  380       amountstr = getAmount rules fields
  381       amountstr' = strnegate amountstr where strnegate ('-':s) = s
  382                                              strnegate s = '-':s
  383       currency = maybe (fromMaybe "" $ baseCurrency rules) (atDef "" fields) (currencyField rules)
  384       amountstr'' = currency ++ amountstr'
  385       amountparse = runParser someamount nullctx "" amountstr''
  386       amount = either (const nullmixedamt) id amountparse
  387       -- Using costOfMixedAmount here to allow complex costs like "10 GBP @@ 15 USD".
  388       -- Aim is to have "10 GBP @@ 15 USD" applied to account "acct", but have "-15USD" applied to "baseacct"
  389       baseamount = costOfMixedAmount amount
  390       unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown"
  391                   | otherwise = "expenses:unknown"
  392       (acct',newdesc) = identify (accountRules rules) unknownacct desc
  393       acct = maybe acct' (atDef "" fields) (account2Field rules)
  394       t = Transaction {
  395               tdate=date,
  396               teffectivedate=effectivedate,
  397               tstatus=status,
  398               tcode=code,
  399               tdescription=newdesc,
  400               tcomment=comment,
  401               tpreceding_comment_lines=precomment,
  402               tmetadata=[],
  403               tpostings=[
  404                    Posting {
  405                      pstatus=False,
  406                      paccount=acct,
  407                      pamount=amount,
  408                      pcomment="",
  409                      ptype=RegularPosting,
  410                      pmetadata=[],
  411                      ptransaction=Just t
  412                    },
  413                    Posting {
  414                      pstatus=False,
  415                      paccount=baseacc,
  416                      pamount=(-baseamount),
  417                      pcomment="",
  418                      ptype=RegularPosting,
  419                      pmetadata=[],
  420                      ptransaction=Just t
  421                    }
  422                   ]
  423             }
  424   in t
  425 
  426 -- | Convert some date string with unknown format to YYYY/MM/DD.
  427 normaliseDate :: Maybe String -- ^ User-supplied date format: this should be tried in preference to all others
  428               -> String -> String
  429 normaliseDate mb_user_format s = maybe "0000/00/00" showDate $
  430               firstJust $
  431               (maybe id (\user_format -> (parseTime defaultTimeLocale user_format s :)) mb_user_format) $
  432               [parseTime defaultTimeLocale "%Y/%m/%e" s
  433                -- can't parse a month without leading 0, try adding one
  434               ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
  435               ,parseTime defaultTimeLocale "%Y-%m-%e" s
  436               ,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
  437               ,parseTime defaultTimeLocale "%m/%e/%Y" s
  438               ,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s)
  439               ,parseTime defaultTimeLocale "%m-%e-%Y" s
  440               ,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
  441               ]
  442 
  443 -- | Apply account matching rules to a transaction description to obtain
  444 -- the most appropriate account and a new description.
  445 identify :: [AccountRule] -> String -> String -> (String,String)
  446 identify rules defacct desc | null matchingrules = (defacct,desc)
  447                             | otherwise = (acct,newdesc)
  448     where
  449       matchingrules = filter ismatch rules :: [AccountRule]
  450           where ismatch = any ((`regexMatchesCI` desc) . fst) . fst
  451       (prs,acct) = head matchingrules
  452       p_ms_r = filter (\(_,m,_) -> m) $ map (\(p,r) -> (p, p `regexMatchesCI` desc, r)) prs
  453       (p,_,r) = head p_ms_r
  454       newdesc = case r of Just repl -> regexReplaceCI p repl desc
  455                           Nothing   -> desc
  456 
  457 caseinsensitive = ("(?i)"++)
  458 
  459 getAmount :: CsvRules -> CsvRecord -> String
  460 getAmount rules fields = case amountField rules of
  461   Just f  -> maybe "" (atDef "" fields) $ Just f
  462   Nothing ->
  463     case (c, d) of
  464       (x, "") -> x
  465       ("", x) -> "-"++x
  466       _ -> ""
  467     where
  468       c = maybe "" (atDef "" fields) (inField rules)
  469       d = maybe "" (atDef "" fields) (outField rules)
  470 
  471 tests_Hledger_Cli_Convert = TestList (test_parser ++ test_description_parsing)
  472 
  473 test_description_parsing = [
  474       "description-field 1" ~: assertParseDescription "description-field 1\n" [FormatField False Nothing Nothing (FieldNo 1)]
  475     , "description-field 1 " ~: assertParseDescription "description-field 1 \n" [FormatField False Nothing Nothing (FieldNo 1)]
  476     , "description-field %(1)" ~: assertParseDescription "description-field %(1)\n" [FormatField False Nothing Nothing (FieldNo 1)]
  477     , "description-field %(1)/$(2)" ~: assertParseDescription "description-field %(1)/%(2)\n" [
  478           FormatField False Nothing Nothing (FieldNo 1)
  479         , FormatLiteral "/"
  480         , FormatField False Nothing Nothing (FieldNo 2)
  481         ]
  482     ]
  483   where
  484     assertParseDescription string expected = do assertParseEqual (parseDescription string) (nullrules {descriptionField = expected})
  485     parseDescription :: String -> Either ParseError CsvRules
  486     parseDescription x = runParser descriptionfieldWrapper nullrules "(unknown)" x
  487     descriptionfieldWrapper :: GenParser Char CsvRules CsvRules
  488     descriptionfieldWrapper = do
  489       descriptionfield
  490       r <- getState
  491       return r
  492 
  493 test_parser =  [
  494 
  495    "convert rules parsing: empty file" ~: do
  496      -- let assertMixedAmountParse parseresult mixedamount =
  497      --         (either (const "parse error") showMixedAmountDebug parseresult) ~?= (showMixedAmountDebug mixedamount)
  498     assertParseEqual (parseCsvRules "unknown" "") nullrules
  499 
  500   ,"convert rules parsing: accountrule" ~: do
  501      assertParseEqual (parseWithCtx nullrules accountrule "A\na\n") -- leading blank line required
  502                  ([("A",Nothing)], "a")
  503 
  504   ,"convert rules parsing: trailing comments" ~: do
  505      assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#\n")
  506 
  507   ,"convert rules parsing: trailing blank lines" ~: do
  508      assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n  \n")
  509 
  510   -- not supported
  511   -- ,"convert rules parsing: no final newline" ~: do
  512   --    assertParse (parseWithCtx nullrules csvrulesfile "A\na")
  513   --    assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#")
  514   --    assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n  ")
  515 
  516                  -- (nullrules{
  517                  --   -- dateField=Maybe FieldPosition,
  518                  --   -- statusField=Maybe FieldPosition,
  519                  --   -- codeField=Maybe FieldPosition,
  520                  --   -- descriptionField=Maybe FieldPosition,
  521                  --   -- amountField=Maybe FieldPosition,
  522                  --   -- currencyField=Maybe FieldPosition,
  523                  --   -- baseCurrency=Maybe String,
  524                  --   -- baseAccount=AccountName,
  525                  --   accountRules=[
  526                  --        ([("A",Nothing)], "a")
  527                  --       ]
  528                  --  })
  529 
  530   ]