1 {-| 2 Convert account data in CSV format (eg downloaded from a bank) to journal 3 format, and print it on stdout. See the manual for more details. 4 -} 5 6 module Hledger.Cli.Convert where 7 import Control.Monad (when, guard, liftM) 8 import Data.Maybe 9 import Data.Time.Format (parseTime) 10 import Safe 11 import System.Directory (doesFileExist) 12 import System.Exit (exitFailure) 13 import System.FilePath (takeBaseName, replaceExtension) 14 import System.IO (stderr) 15 import System.Locale (defaultTimeLocale) 16 import Test.HUnit 17 import Text.CSV (parseCSV, parseCSVFromFile, printCSV, CSV) 18 import Text.ParserCombinators.Parsec 19 import Text.Printf (hPrintf) 20 21 import Prelude hiding (getContents) 22 import Hledger.Utils.UTF8 (getContents) 23 import Hledger 24 import Hledger.Cli.Format 25 import qualified Hledger.Cli.Format as Format 26 import Hledger.Cli.Version 27 import Hledger.Cli.Options 28 29 {- | 30 A set of data definitions and account-matching patterns sufficient to 31 convert a particular CSV data file into meaningful journal transactions. See above. 32 -} 33 data CsvRules = CsvRules { 34 dateField :: Maybe FieldPosition, 35 dateFormat :: Maybe String, 36 statusField :: Maybe FieldPosition, 37 codeField :: Maybe FieldPosition, 38 descriptionField :: [FormatString], 39 amountField :: Maybe FieldPosition, 40 inField :: Maybe FieldPosition, 41 outField :: Maybe FieldPosition, 42 currencyField :: Maybe FieldPosition, 43 baseCurrency :: Maybe String, 44 accountField :: Maybe FieldPosition, 45 account2Field :: Maybe FieldPosition, 46 effectiveDateField :: Maybe FieldPosition, 47 baseAccount :: AccountName, 48 accountRules :: [AccountRule] 49 } deriving (Show, Eq) 50 51 nullrules = CsvRules { 52 dateField=Nothing, 53 dateFormat=Nothing, 54 statusField=Nothing, 55 codeField=Nothing, 56 descriptionField=[], 57 amountField=Nothing, 58 inField=Nothing, 59 outField=Nothing, 60 currencyField=Nothing, 61 baseCurrency=Nothing, 62 accountField=Nothing, 63 account2Field=Nothing, 64 effectiveDateField=Nothing, 65 baseAccount="unknown", 66 accountRules=[] 67 } 68 69 type FieldPosition = Int 70 71 type AccountRule = ( 72 [(String, Maybe String)] -- list of regex match patterns with optional replacements 73 ,AccountName -- account name to use for a transaction matching this rule 74 ) 75 76 type CsvRecord = [String] 77 78 79 -- | Read the CSV file named as an argument and print equivalent journal transactions, 80 -- using/creating a .rules file. 81 convert :: CliOpts -> IO () 82 convert opts = do 83 let csvfile = case headDef "" $ patterns_ $ reportopts_ opts of 84 "" -> "-" 85 s -> s 86 usingStdin = csvfile == "-" 87 rulesFileSpecified = isJust $ rules_file_ opts 88 rulesfile = rulesFileFor opts csvfile 89 when (usingStdin && (not rulesFileSpecified)) $ error' "please use --rules to specify a rules file when converting stdin" 90 csvparse <- parseCsv csvfile 91 let records = case csvparse of 92 Left e -> error' $ show e 93 Right rs -> reverse $ filter (/= [""]) rs 94 exists <- doesFileExist rulesfile 95 if (not exists) then do 96 hPrintf stderr "creating conversion rules file %s, edit this file for better results\n" rulesfile 97 writeFile rulesfile initialRulesFileContent 98 else 99 hPrintf stderr "using conversion rules file %s\n" rulesfile 100 rules <- liftM (either (error'.show) id) $ parseCsvRulesFile rulesfile 101 let invalid = validateRules rules 102 when (debug_ opts) $ hPrintf stderr "rules: %s\n" (show rules) 103 when (isJust invalid) $ error (fromJust invalid) 104 let requiredfields = max 2 (maxFieldIndex rules + 1) 105 badrecords = take 1 $ filter ((< requiredfields).length) records 106 if null badrecords 107 then mapM_ (printTxn (debug_ opts) rules) records 108 else do 109 hPrintf stderr (unlines [ 110 "Warning, at least one CSV record does not contain a field referenced by the" 111 ,"conversion rules file, or has less than two fields. Are you converting a" 112 ,"valid CSV file ? First bad record:\n%s" 113 ]) (show $ head badrecords) 114 exitFailure 115 116 parseCsv :: FilePath -> IO (Either ParseError CSV) 117 parseCsv path = 118 case path of 119 "-" -> liftM (parseCSV "(stdin)") getContents 120 p -> parseCSVFromFile p 121 122 -- | The highest (0-based) field index referenced in the field 123 -- definitions, or -1 if no fields are defined. 124 maxFieldIndex :: CsvRules -> Int 125 maxFieldIndex r = maximumDef (-1) $ catMaybes [ 126 dateField r 127 ,statusField r 128 ,codeField r 129 ,amountField r 130 ,inField r 131 ,outField r 132 ,currencyField r 133 ,accountField r 134 ,account2Field r 135 ,effectiveDateField r 136 ] 137 138 rulesFileFor :: CliOpts -> FilePath -> FilePath 139 rulesFileFor CliOpts{rules_file_=Just f} _ = f 140 rulesFileFor CliOpts{rules_file_=Nothing} csvfile = replaceExtension csvfile ".rules" 141 142 initialRulesFileContent :: String 143 initialRulesFileContent = 144 "# csv conversion rules file generated by "++(progversionstr progname)++"\n" ++ 145 "# Add rules to this file for more accurate conversion, see\n"++ 146 "# http://hledger.org/MANUAL.html#convert\n" ++ 147 "\n" ++ 148 "base-account assets:bank:checking\n" ++ 149 "date-field 0\n" ++ 150 "description-field 4\n" ++ 151 "amount-field 1\n" ++ 152 "currency $\n" ++ 153 "\n" ++ 154 "# account-assigning rules\n" ++ 155 "\n" ++ 156 "SPECTRUM\n" ++ 157 "expenses:health:gym\n" ++ 158 "\n" ++ 159 "ITUNES\n" ++ 160 "BLKBSTR=BLOCKBUSTER\n" ++ 161 "expenses:entertainment\n" ++ 162 "\n" ++ 163 "(TO|FROM) SAVINGS\n" ++ 164 "assets:bank:savings\n" 165 166 validateRules :: CsvRules -> Maybe String 167 validateRules rules = let 168 hasAmount = isJust $ amountField rules 169 hasIn = isJust $ inField rules 170 hasOut = isJust $ outField rules 171 in case (hasAmount, hasIn, hasOut) of 172 (True, True, _) -> Just "Don't specify in-field when specifying amount-field" 173 (True, _, True) -> Just "Don't specify out-field when specifying amount-field" 174 (_, False, True) -> Just "Please specify in-field when specifying out-field" 175 (_, True, False) -> Just "Please specify out-field when specifying in-field" 176 (False, False, False) -> Just "Please specify either amount-field, or in-field and out-field" 177 _ -> Nothing 178 179 -- rules file parser 180 181 parseCsvRulesFile :: FilePath -> IO (Either ParseError CsvRules) 182 parseCsvRulesFile f = do 183 s <- readFile f 184 return $ parseCsvRules f s 185 186 parseCsvRules :: FilePath -> String -> Either ParseError CsvRules 187 parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s 188 189 csvrulesfile :: GenParser Char CsvRules CsvRules 190 csvrulesfile = do 191 many blankorcommentline 192 many definitions 193 r <- getState 194 ars <- many accountrule 195 many blankorcommentline 196 eof 197 return r{accountRules=ars} 198 199 definitions :: GenParser Char CsvRules () 200 definitions = do 201 choice' [ 202 datefield 203 ,dateformat 204 ,statusfield 205 ,codefield 206 ,descriptionfield 207 ,amountfield 208 ,infield 209 ,outfield 210 ,currencyfield 211 ,accountfield 212 ,account2field 213 ,effectivedatefield 214 ,basecurrency 215 ,baseaccount 216 ,commentline 217 ] <?> "definition" 218 return () 219 220 datefield = do 221 string "date-field" 222 many1 spacenonewline 223 v <- restofline 224 updateState (\r -> r{dateField=readMay v}) 225 226 effectivedatefield = do 227 string "effective-date-field" 228 many1 spacenonewline 229 v <- restofline 230 updateState (\r -> r{effectiveDateField=readMay v}) 231 232 dateformat = do 233 string "date-format" 234 many1 spacenonewline 235 v <- restofline 236 updateState (\r -> r{dateFormat=Just v}) 237 238 codefield = do 239 string "code-field" 240 many1 spacenonewline 241 v <- restofline 242 updateState (\r -> r{codeField=readMay v}) 243 244 statusfield = do 245 string "status-field" 246 many1 spacenonewline 247 v <- restofline 248 updateState (\r -> r{statusField=readMay v}) 249 250 descriptionFieldValue :: GenParser Char st [FormatString] 251 descriptionFieldValue = do 252 -- try (fieldNo <* spacenonewline) 253 try fieldNo 254 <|> formatStrings 255 where 256 fieldNo = many1 digit >>= \x -> return [FormatField False Nothing Nothing $ FieldNo $ read x] 257 258 descriptionfield = do 259 string "description-field" 260 many1 spacenonewline 261 formatS <- descriptionFieldValue 262 restofline 263 updateState (\x -> x{descriptionField=formatS}) 264 265 amountfield = do 266 string "amount-field" 267 many1 spacenonewline 268 v <- restofline 269 x <- updateState (\r -> r{amountField=readMay v}) 270 return x 271 272 infield = do 273 string "in-field" 274 many1 spacenonewline 275 v <- restofline 276 updateState (\r -> r{inField=readMay v}) 277 278 outfield = do 279 string "out-field" 280 many1 spacenonewline 281 v <- restofline 282 updateState (\r -> r{outField=readMay v}) 283 284 currencyfield = do 285 string "currency-field" 286 many1 spacenonewline 287 v <- restofline 288 updateState (\r -> r{currencyField=readMay v}) 289 290 accountfield = do 291 string "account-field" 292 many1 spacenonewline 293 v <- restofline 294 updateState (\r -> r{accountField=readMay v}) 295 296 account2field = do 297 string "account2-field" 298 many1 spacenonewline 299 v <- restofline 300 updateState (\r -> r{account2Field=readMay v}) 301 302 basecurrency = do 303 string "currency" 304 many1 spacenonewline 305 v <- restofline 306 updateState (\r -> r{baseCurrency=Just v}) 307 308 baseaccount = do 309 string "base-account" 310 many1 spacenonewline 311 v <- ledgeraccountname 312 optional newline 313 updateState (\r -> r{baseAccount=v}) 314 315 accountrule :: GenParser Char CsvRules AccountRule 316 accountrule = do 317 many blankorcommentline 318 pats <- many1 matchreplacepattern 319 guard $ length pats >= 2 320 let pats' = init pats 321 acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats 322 many blankorcommentline 323 return (pats',acct) 324 <?> "account rule" 325 326 blanklines = many1 blankline 327 328 blankline = many spacenonewline >> newline >> return () <?> "blank line" 329 330 commentchar = oneOf ";#" 331 332 commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line" 333 334 blankorcommentline = choice' [blankline, commentline] 335 336 matchreplacepattern = do 337 notFollowedBy commentchar 338 matchpat <- many1 (noneOf "=\n") 339 replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"} 340 newline 341 return (matchpat,replpat) 342 343 printTxn :: Bool -> CsvRules -> CsvRecord -> IO () 344 printTxn debug rules rec = do 345 when debug $ hPrintf stderr "record: %s" (printCSV [rec]) 346 putStr $ show $ transactionFromCsvRecord rules rec 347 348 -- csv record conversion 349 formatD :: CsvRecord -> Bool -> Maybe Int -> Maybe Int -> Field -> String 350 formatD record leftJustified min max f = case f of 351 FieldNo n -> maybe "" show $ atMay record n 352 -- Some of these might in theory in read from fields 353 Format.Account -> "" 354 DepthSpacer -> "" 355 Total -> "" 356 DefaultDate -> "" 357 Description -> "" 358 where 359 show = formatValue leftJustified min max 360 361 formatDescription :: CsvRecord -> [FormatString] -> String 362 formatDescription _ [] = "" 363 formatDescription record (f:fs) = s ++ (formatDescription record fs) 364 where s = case f of 365 FormatLiteral l -> l 366 FormatField leftJustified min max field -> formatD record leftJustified min max field 367 368 transactionFromCsvRecord :: CsvRules -> CsvRecord -> Transaction 369 transactionFromCsvRecord rules fields = 370 let 371 date = parsedate $ normaliseDate (dateFormat rules) $ maybe "1900/1/1" (atDef "" fields) (dateField rules) 372 effectivedate = do idx <- effectiveDateField rules 373 return $ parsedate $ normaliseDate (dateFormat rules) $ (atDef "" fields) idx 374 status = maybe False (null . strip . (atDef "" fields)) (statusField rules) 375 code = maybe "" (atDef "" fields) (codeField rules) 376 desc = formatDescription fields (descriptionField rules) 377 comment = "" 378 precomment = "" 379 baseacc = maybe (baseAccount rules) (atDef "" fields) (accountField rules) 380 amountstr = getAmount rules fields 381 amountstr' = strnegate amountstr where strnegate ('-':s) = s 382 strnegate s = '-':s 383 currency = maybe (fromMaybe "" $ baseCurrency rules) (atDef "" fields) (currencyField rules) 384 amountstr'' = currency ++ amountstr' 385 amountparse = runParser someamount nullctx "" amountstr'' 386 amount = either (const nullmixedamt) id amountparse 387 -- Using costOfMixedAmount here to allow complex costs like "10 GBP @@ 15 USD". 388 -- Aim is to have "10 GBP @@ 15 USD" applied to account "acct", but have "-15USD" applied to "baseacct" 389 baseamount = costOfMixedAmount amount 390 unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown" 391 | otherwise = "expenses:unknown" 392 (acct',newdesc) = identify (accountRules rules) unknownacct desc 393 acct = maybe acct' (atDef "" fields) (account2Field rules) 394 t = Transaction { 395 tdate=date, 396 teffectivedate=effectivedate, 397 tstatus=status, 398 tcode=code, 399 tdescription=newdesc, 400 tcomment=comment, 401 tpreceding_comment_lines=precomment, 402 tmetadata=[], 403 tpostings=[ 404 Posting { 405 pstatus=False, 406 paccount=acct, 407 pamount=amount, 408 pcomment="", 409 ptype=RegularPosting, 410 pmetadata=[], 411 ptransaction=Just t 412 }, 413 Posting { 414 pstatus=False, 415 paccount=baseacc, 416 pamount=(-baseamount), 417 pcomment="", 418 ptype=RegularPosting, 419 pmetadata=[], 420 ptransaction=Just t 421 } 422 ] 423 } 424 in t 425 426 -- | Convert some date string with unknown format to YYYY/MM/DD. 427 normaliseDate :: Maybe String -- ^ User-supplied date format: this should be tried in preference to all others 428 -> String -> String 429 normaliseDate mb_user_format s = maybe "0000/00/00" showDate $ 430 firstJust $ 431 (maybe id (\user_format -> (parseTime defaultTimeLocale user_format s :)) mb_user_format) $ 432 [parseTime defaultTimeLocale "%Y/%m/%e" s 433 -- can't parse a month without leading 0, try adding one 434 ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s) 435 ,parseTime defaultTimeLocale "%Y-%m-%e" s 436 ,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s) 437 ,parseTime defaultTimeLocale "%m/%e/%Y" s 438 ,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s) 439 ,parseTime defaultTimeLocale "%m-%e-%Y" s 440 ,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s) 441 ] 442 443 -- | Apply account matching rules to a transaction description to obtain 444 -- the most appropriate account and a new description. 445 identify :: [AccountRule] -> String -> String -> (String,String) 446 identify rules defacct desc | null matchingrules = (defacct,desc) 447 | otherwise = (acct,newdesc) 448 where 449 matchingrules = filter ismatch rules :: [AccountRule] 450 where ismatch = any ((`regexMatchesCI` desc) . fst) . fst 451 (prs,acct) = head matchingrules 452 p_ms_r = filter (\(_,m,_) -> m) $ map (\(p,r) -> (p, p `regexMatchesCI` desc, r)) prs 453 (p,_,r) = head p_ms_r 454 newdesc = case r of Just repl -> regexReplaceCI p repl desc 455 Nothing -> desc 456 457 caseinsensitive = ("(?i)"++) 458 459 getAmount :: CsvRules -> CsvRecord -> String 460 getAmount rules fields = case amountField rules of 461 Just f -> maybe "" (atDef "" fields) $ Just f 462 Nothing -> 463 case (c, d) of 464 (x, "") -> x 465 ("", x) -> "-"++x 466 _ -> "" 467 where 468 c = maybe "" (atDef "" fields) (inField rules) 469 d = maybe "" (atDef "" fields) (outField rules) 470 471 tests_Hledger_Cli_Convert = TestList (test_parser ++ test_description_parsing) 472 473 test_description_parsing = [ 474 "description-field 1" ~: assertParseDescription "description-field 1\n" [FormatField False Nothing Nothing (FieldNo 1)] 475 , "description-field 1 " ~: assertParseDescription "description-field 1 \n" [FormatField False Nothing Nothing (FieldNo 1)] 476 , "description-field %(1)" ~: assertParseDescription "description-field %(1)\n" [FormatField False Nothing Nothing (FieldNo 1)] 477 , "description-field %(1)/$(2)" ~: assertParseDescription "description-field %(1)/%(2)\n" [ 478 FormatField False Nothing Nothing (FieldNo 1) 479 , FormatLiteral "/" 480 , FormatField False Nothing Nothing (FieldNo 2) 481 ] 482 ] 483 where 484 assertParseDescription string expected = do assertParseEqual (parseDescription string) (nullrules {descriptionField = expected}) 485 parseDescription :: String -> Either ParseError CsvRules 486 parseDescription x = runParser descriptionfieldWrapper nullrules "(unknown)" x 487 descriptionfieldWrapper :: GenParser Char CsvRules CsvRules 488 descriptionfieldWrapper = do 489 descriptionfield 490 r <- getState 491 return r 492 493 test_parser = [ 494 495 "convert rules parsing: empty file" ~: do 496 -- let assertMixedAmountParse parseresult mixedamount = 497 -- (either (const "parse error") showMixedAmountDebug parseresult) ~?= (showMixedAmountDebug mixedamount) 498 assertParseEqual (parseCsvRules "unknown" "") nullrules 499 500 ,"convert rules parsing: accountrule" ~: do 501 assertParseEqual (parseWithCtx nullrules accountrule "A\na\n") -- leading blank line required 502 ([("A",Nothing)], "a") 503 504 ,"convert rules parsing: trailing comments" ~: do 505 assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#\n") 506 507 ,"convert rules parsing: trailing blank lines" ~: do 508 assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n \n") 509 510 -- not supported 511 -- ,"convert rules parsing: no final newline" ~: do 512 -- assertParse (parseWithCtx nullrules csvrulesfile "A\na") 513 -- assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#") 514 -- assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n ") 515 516 -- (nullrules{ 517 -- -- dateField=Maybe FieldPosition, 518 -- -- statusField=Maybe FieldPosition, 519 -- -- codeField=Maybe FieldPosition, 520 -- -- descriptionField=Maybe FieldPosition, 521 -- -- amountField=Maybe FieldPosition, 522 -- -- currencyField=Maybe FieldPosition, 523 -- -- baseCurrency=Maybe String, 524 -- -- baseAccount=AccountName, 525 -- accountRules=[ 526 -- ([("A",Nothing)], "a") 527 -- ] 528 -- }) 529 530 ]