[various improvements including CSV output Ganesh Sittampalam **20100405173636 Ignore-this: 5835da92c1ed871952130ec9cae19f ] hunk ./CaseList.hs 1 -{-# LANGUAGE ViewPatterns, PatternGuards, NoMonomorphismRestriction #-} +{-# LANGUAGE ViewPatterns, PatternGuards, ScopedTypeVariables, NoMonomorphismRestriction #-} hunk ./CaseList.hs 6 +import Data.Function hunk ./CaseList.hs 9 +import Text.CSV hunk ./CaseList.hs 12 +import Text.Regex hunk ./CaseList.hs 33 +getVal key t = lookup key t >>= maybeRead + +maybeRead str = case reads str of + [(v, "")] -> Just v + _ -> Nothing + hunk ./CaseList.hs 42 +hasPrefix x str | x `isPrefixOf` str = Just (drop (length x) str) + | otherwise = Nothing + hunk ./CaseList.hs 55 -recognisePage (TagBranch "page" _ items) = tabulate <$> catMaybes <$> mapM recogniseItem items +recognisePage (TagBranch "page" _ items) = filter (not . tabulatedJunk) <$> tabulate <$> catMaybes <$> mapM recogniseItem items hunk ./CaseList.hs 59 -tabulate (((row, column), info):rest@(((row', column'), _):_)) - | column == column' = ((fst info || fst x, snd info ++ snd x):xs):xss - | row == row' = (info:x:xs):xss +tabulate (((row, column), (title, matchRegex dateStart -> Just [date, other])):rest) + = tabulate (((row, column), (title, date)):((row, column+1), (title, other)):rest) + +tabulate (((row :: Int, column :: Int), info@(title, text)):rest@(((row', column'), _):_)) + | row' <= row && row' >= row - 30 = (info:x:xs):xss + | column' >= column - 20 && title == title' = ((title', text ++ text'):xs):xss hunk ./CaseList.hs 66 - where (x:xs):xss = tabulate rest + where (x@(title', text'):xs):xss = tabulate rest hunk ./CaseList.hs 70 +tabulatedJunk [(True, hasPrefix "Freedom of Information and Environmental Information Regulations caseload snapshot in " -> Just _)] = True +tabulatedJunk [(False, hasPrefix "This report provides a snapshot of the Freedom of Information and Environmental Information Regulations cases " -> Just _)] = True +tabulatedJunk _ = False + + +dateStart = mkRegex "^([[:digit:]]{2}/[[:digit:]]{2}/[[:digit:]]{4}) (.*)$" + hunk ./CaseList.hs 78 - | Just top <- lookup "top" attrs, Just left <- lookup "left" attrs + | Just top <- getVal "top" attrs, Just left <- getVal "left" attrs hunk ./CaseList.hs 81 - | Just top <- lookup "top" attrs, Just left <- lookup "left" attrs + | Just top <- getVal "top" attrs, Just left <- getVal "left" attrs hunk ./CaseList.hs 90 - writeFile (stem ++ ".html") ("" ++ concat ["" ++ concat ["" | (title, text) <- row] ++ "" | row <- table] ++ "
" ++ (if title then "" ++ text ++ "" else text) ++ "
") + writeFile (stem ++ ".html") ("\n" ++ concat [" \n" ++ concat [" \n" | (title, text) <- row] ++ " \n" | row <- table] ++ "
" ++ (if title then "" ++ text ++ "" else text) ++ "
\n") + writeFile (stem ++ ".csv") (printCSV (map (map snd) table))