Parsec 模块应用,处理 lighttpd log 文件

注意,此程序只是演示 parsec 的处理,并不是实际的处理程序,因为内存占用极大,因此不实用。

{-
LogWatch
author: Albert Lee
-}

module Main where

import System.IO
import System.Environment
import System.IO.Error hiding (try)
import Data.List
import qualified IO 
import Text.ParserCombinators.Parsec
import Control.Monad

type Ip = String
type Host = String
type Time = String
type Url = String
type Client = String
type Status = Int
type Req = (String, String, String)
type Size = Integer
data AccessInfo = AccessInfo {
                              ip :: Ip, 
                              host :: Host, 
                              time :: Time, 
                              url :: Url, 
                              client :: Client, 
                              req :: Req, 
                              status :: Status, 
                              size :: Size
                             } deriving (Eq, Show)

parseIP :: Parser Ip
parseIP = many1 (digit <|> char '.')

parseHost :: Parser Host
parseHost = many1 (letter <|> char '.' <|> digit)

parseTime :: Parser Time
parseTime = do
  char '['
  t <- many1 (letter <|> digit <|> oneOf "/:+-" <|> space)
  char ']'
  return t

parseReq :: Parser Req
parseReq = do
  char '"'
  cmd <- many1 letter
  space
  uri <- many1 (oneOf "/.~_" <|> letter <|> digit)
  space
  protocol <- many1 (letter <|> oneOf "/." <|> digit)
  char '"'
  return (cmd, uri, protocol)

parseStatus :: Parser Status
parseStatus = liftM read $ many1 digit

parseSize :: Parser Size
parseSize = liftM read $ many1 digit

parseUrl :: Parser Url
parseUrl = do
  char '"'
  manyTill anyChar (try (char '"'))

parseClient :: Parser Client
parseClient = do
  char '"'
  manyTill anyChar (try (char '"'))

logLine :: Parser AccessInfo
logLine = do 
  let ret = []
  ip <- parseIP
  space
  host <- parseHost
  space; char '-'; space
  time <- parseTime
  space
  req <- parseReq
  space
  status <- parseStatus
  space
  size <- parseSize
  space
  url <- parseUrl
  space
  client <- parseClient
  return $ AccessInfo ip host time url client req status size
         
procLogFile :: String -> IO()
procLogFile fn = do
  IO.bracket (openFile fn ReadMode) hClose
        (\h -> do contents <- hGetContents h
                  let
                      ls = lines contents 
                      rs = [parse logLine "" x | x <- ls ]
                      rs1 = head rs
                  print $ sum302 rs 
        )
sum302 ::[Either ParseError AccessInfo] -> Int
sum302 rs = sum $ map (\x-> either (\e->0)(\i->if status i == 302 then 1 else 0) x) rs

main = do
  args <- getArgs 
  procLogFile $ head args