Filter.hs 15.4 KB
Newer Older
1
{-- Author: Henrik Tramberend <henrik@tramberend.de> --}
Henrik Tramberend's avatar
Henrik Tramberend committed
2
module Filter
3
4
5
6
  ( Layout(..)
  , OutputFormat(..)
  , Disposition(..)
  , processPandoc
7
8
  , hasAttrib
  , blockClasses
9
  , makeSlides
10
  , makeBoxes
11
12
13
14
  , useCachedImages
  , escapeToFilePath
  , cachePandocImages
  , extractLocalImagePathes
15
  , renderMediaTags
16
17
  , transformImageSize
  , lazyLoadImage
18
19
20
  , iframeExtensions
  , audioExtensions
  , videoExtensions
21
  ) where
Henrik Tramberend's avatar
Henrik Tramberend committed
22

23
24
import Common
import Control.Exception
25
import Control.Monad.State
26
import qualified Data.ByteString.Lazy.Char8 as L8
Henrik Tramberend's avatar
Henrik Tramberend committed
27
import Data.Default ()
28
import Data.List
Henrik Tramberend's avatar
Henrik Tramberend committed
29
import Data.List.Split
30
31
import Data.Maybe
import Development.Shake (Action)
32
import Network.HTTP.Conduit hiding (InternalException)
33
import Network.HTTP.Simple
34
import qualified Network.URI as U
35
import Network.URI (parseURI, uriScheme)
36
37
import System.Directory
import System.FilePath
Henrik Tramberend's avatar
Henrik Tramberend committed
38
39
import Text.Blaze (customAttribute)
import Text.Blaze.Html.Renderer.String
40
import Text.Blaze.Html5 as H
41
42
       ((!), audio, iframe, iframe, img, stringTag, toValue, video)
import Text.Blaze.Html5.Attributes as A (alt, class_, id, title)
43
import Text.Pandoc
Henrik Tramberend's avatar
Henrik Tramberend committed
44
import Text.Pandoc.Definition ()
45
import Text.Pandoc.Shared
Henrik Tramberend's avatar
Henrik Tramberend committed
46
47
48
import Text.Pandoc.Walk
import Text.Read

49
processPandoc ::
50
51
52
53
54
55
56
57
     (Pandoc -> Decker Pandoc)
  -> FilePath
  -> Disposition
  -> Provisioning
  -> Pandoc
  -> Action Pandoc
processPandoc transform base disp prov pandoc =
  evalStateT (transform pandoc) (DeckerState base disp prov 0 [] [])
Henrik Tramberend's avatar
Henrik Tramberend committed
58
59

isSlideHeader :: Block -> Bool
60
isSlideHeader (Header 1 _ _) = True
61
isSlideHeader HorizontalRule = True
Henrik Tramberend's avatar
Henrik Tramberend committed
62
63
64
isSlideHeader _ = False

isBoxDelim :: Block -> Bool
65
isBoxDelim (Header 2 _ _) = True
Henrik Tramberend's avatar
Henrik Tramberend committed
66
67
isBoxDelim _ = False

68
hasClass :: String -> Block -> Bool
69
hasClass which = elem which . blockClasses
70
71

hasAnyClass :: [String] -> Block -> Bool
72
73
74
hasAnyClass which = isJust . firstClass which

firstClass :: [String] -> Block -> Maybe String
Henrik Tramberend's avatar
Henrik Tramberend committed
75
firstClass which block = listToMaybe $ filter (`hasClass` block) which
76

77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
-- | Slide layouts are rows of one ore more columns.
data RowLayout = RowLayout
  { lname :: String
  , rows :: [Row]
  } deriving (Eq, Show)

-- | A row consists of one or more column. 
data Row
  = SingleColumn String
  | MultiColumn [String]
  deriving (Eq, Show)

type Area = [Block]

type AreaMap = [(String, Area)]

93
rowLayouts :: [RowLayout]
94
95
96
rowLayouts =
  [ RowLayout
      "columns"
97
98
99
100
      [ SingleColumn "top"
      , MultiColumn ["left", "center", "right"]
      , SingleColumn "bottom"
      ]
101
102
  ]

103
rowAreas :: Row -> [String]
104
105
106
rowAreas (SingleColumn area) = [area]
rowAreas (MultiColumn areas) = areas

107
108
layoutAreas :: RowLayout -> [String]
layoutAreas l = concatMap rowAreas $ rows l
109
110
111

hasRowLayout :: Block -> Maybe RowLayout
hasRowLayout block =
112
  hasAttrib "layout" block >>= (\l -> find ((==) l . lname) rowLayouts)
113
114

renderRow :: AreaMap -> Row -> Maybe Block
115
116
117
renderRow areaMap (SingleColumn area) =
  lookup area areaMap >>= Just . Div ("", ["single-column-row"], [])
renderRow areaMap (MultiColumn areas) =
118
  Just $
119
120
  Div ("", ["multi-column-row", "multi-column-row-" ++ show (length areas)], []) $
  mapMaybe renderArea (zip [1 ..] areas)
121
122
123
124
125
126
  where
    renderArea (i, area) = lookup area areaMap >>= Just . renderColumn . (i, )

renderColumn :: (Int, [Block]) -> Block
renderColumn (i, blocks) =
  let grow =
Henrik Tramberend's avatar
Henrik Tramberend committed
127
        fromMaybe (1 :: Int) $ lookup "grow" (blockKeyvals blocks) >>= readMaybe
128
129
130
  in Div
       ( ""
       , ["grow-" ++ show grow, "column", "column-" ++ show i]
Henrik Tramberend's avatar
Henrik Tramberend committed
131
       , blockKeyvals blocks)
132
133
134
135
136
137
138
139
140
       blocks

blockKeyvals :: [Block] -> [(String, String)]
blockKeyvals (first:_) =
  let (_, _, kv) = blockAttribs first
  in kv
blockKeyvals [] = []

renderLayout :: AreaMap -> RowLayout -> [Block]
Henrik Tramberend's avatar
Henrik Tramberend committed
141
renderLayout areaMap l = mapMaybe (renderRow areaMap) (rows l)
142
143
144
145
146
147

slideAreas :: [String] -> [Block] -> AreaMap
slideAreas names blocks =
  mapMaybe (\area -> firstClass names (head area) >>= Just . (, area)) $
  filter (not . null) $ split (keepDelimsL $ whenElt (hasAnyClass names)) blocks

148
149
layoutSlides :: Slide -> Slide
layoutSlides slide@(header, body) =
150
  case hasRowLayout header of
151
152
    Just l ->
      let names = layoutAreas l
153
          areas = slideAreas names body
154
      in (header, renderLayout areas l)
155
156
    Nothing -> slide

157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
hasAttrib :: String -> Block -> Maybe String
hasAttrib which (Div (_, _, keyvals) _) = lookup which keyvals
hasAttrib which (Header 1 (_, _, keyvals) _) = lookup which keyvals
hasAttrib which (CodeBlock (_, _, keyvals) _) = lookup which keyvals
hasAttrib which (Para [Image (_, _, keyvals) _ _]) = lookup which keyvals
hasAttrib _ _ = Nothing

blockClasses :: Block -> [String]
blockClasses (Div (_, classes, _) _) = classes
blockClasses (Header 1 (_, classes, _) _) = classes
blockClasses (CodeBlock (_, classes, _) _) = classes
blockClasses (Para [Image (_, classes, _) _ _]) = classes
blockClasses _ = []

blockAttribs :: Block -> (String, [String], [(String, String)])
blockAttribs (Div attribs _) = attribs
blockAttribs (Header 1 attribs _) = attribs
blockAttribs (CodeBlock attribs _) = attribs
blockAttribs (Para [Image attribs _ _]) = attribs
blockAttribs _ = ("", [], [])

178
-- | Split join columns with CSS3. Must be performed after `wrapBoxes`.
179
splitJoinColumns :: Slide -> Slide
180
splitJoinColumns (header, body) = (header, concatMap wrapRow rowBlocks)
181
  where
182
183
    rowBlocks =
      split (keepDelimsL $ whenElt (hasAnyClass ["split", "join"])) body
184
185
186
    wrapRow row@(first:_)
      | hasClass "split" first = [Div ("", ["css-columns"], []) row]
    wrapRow row = row
Henrik Tramberend's avatar
Henrik Tramberend committed
187
188

-- All fragment related classes from reveal.js have to be moved to the enclosing
189
190
-- DIV element. Otherwise to many fragments are produced.
fragmentRelated :: [String]
191
fragmentRelated =
192
193
194
195
196
197
198
199
200
201
202
203
  [ "fragment"
  , "grow"
  , "shrink"
  , "roll-in"
  , "fade-in"
  , "fade-out"
  , "current-visible"
  , "highlight-current-blue"
  , "highlight-red"
  , "highlight-green"
  , "highlight-blu"
  ]
Henrik Tramberend's avatar
Henrik Tramberend committed
204
205
206
207

deFragment :: [String] -> [String]
deFragment = filter (`notElem` fragmentRelated)

208
allImages :: Inline -> [Inline]
209
210
211
allImages image@Image {} = [image]
allImages _ = []

212
zapImages :: Inline -> Inline
213
214
215
216
217
zapImages Image {} = Space
zapImages inline = inline

-- Transform inline image or video elements within the header line with
-- background attributes of the respective section. 
218
setSlideBackground :: Slide -> Slide
Henrik Tramberend's avatar
Henrik Tramberend committed
219
setSlideBackground slide@(Header 1 (headerId, headerClasses, headerAttributes) inlines, slideBody) =
220
221
  case query allImages inlines of
    Image (_, imageClasses, imageAttributes) _ (imageSrc, _):_ ->
222
223
224
225
226
227
228
229
      ( Header
          1
          ( headerId
          , headerClasses ++ imageClasses
          , srcAttribute imageSrc :
            headerAttributes ++ map transform imageAttributes)
          (walk zapImages inlines)
      , slideBody)
230
    _ -> slide
231
  where
232
233
234
235
236
237
    transform ("size", value) = ("data-background-size", value)
    transform ("position", value) = ("data-background-position", value)
    transform ("repeat", value) = ("data-background-repeat", value)
    transform ("loop", value) = ("data-background-video-loop", value)
    transform ("muted", value) = ("data-background-video-muted", value)
    transform ("color", value) = ("data-background-color", value)
238
    transform ("interactive", value) = ("data-background-interactive", value)
239
240
241
242
243
    transform kv = kv
    srcAttribute src =
      case classifyFilePath src of
        VideoMedia -> ("data-background-video", src)
        AudioMedia -> ("data-background-audio", src)
244
        IframeMedia -> ("data-background-iframe", src)
245
246
        ImageMedia -> ("data-background-image", src)
setSlideBackground slide = slide
247

248
-- | Wrap boxes around H2 headers and the following content. All attributes are
249
-- promoted from the H2 header to the enclosing DIV.
250
251
wrapBoxes :: Slide -> Slide
wrapBoxes (header, body) = (header, concatMap wrap boxes)
252
253
254
255
  where
    boxes = split (keepDelimsL $ whenElt isBoxDelim) body
    wrap (Header 2 (id_, cls, kvs) text:blocks) =
      [ Div
256
          ("", "box" : cls, kvs)
257
258
259
          (Header 2 (id_, deFragment cls, kvs) text : blocks)
      ]
    wrap box = box
Henrik Tramberend's avatar
Henrik Tramberend committed
260

261
262
-- | Wrap H1 headers with class notes into a DIV and promote all header
-- attributes to the DIV.
263
wrapNoteRevealjs :: Slide -> Slide
264
wrapNoteRevealjs (header@(Header 1 (id_, cls, kvs) _), body)
265
  | "notes" `elem` cls = (Div (id_, cls, kvs) (header : body), [])
Henrik Tramberend's avatar
Henrik Tramberend committed
266
267
wrapNoteRevealjs slide = slide

268
type Slide = (Block, [Block])
Henrik Tramberend's avatar
Henrik Tramberend committed
269

270
271
272
273
274
-- | Map over all slides in a deck. A slide has always a header followed by zero
-- or more blocks.
mapSlides :: (Slide -> Slide) -> Pandoc -> Pandoc
mapSlides func (Pandoc meta blocks) =
  Pandoc meta (concatMap (prependHeader . func) slides)
275
  where
276
277
    slideBlocks = split (keepDelimsL $ whenElt isSlideHeader) blocks
    slides = map extractHeader $ filter (not . null) slideBlocks
278
    extractHeader (header@(Header 1 _ _):bs) = (header, bs)
Henrik Tramberend's avatar
Henrik Tramberend committed
279
    extractHeader (rule@HorizontalRule:bs) = (rule, bs)
280
281
    extractHeader slide =
      throw $
282
      PandocException $ "Error extracting slide header: \n" ++ show slide
283
    prependHeader (header, bs) = header : bs
Henrik Tramberend's avatar
Henrik Tramberend committed
284

285
286
287
makeSlides :: Pandoc -> Decker Pandoc
makeSlides pandoc = do
  disp <- gets disposition
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
  let chain =
        case disp of
          Disposition Deck Html ->
            layoutSlides .
            splitJoinColumns . setSlideBackground . wrapBoxes . wrapNoteRevealjs
                -- TODO: Maybe we need some handout specific structure
          Disposition Handout Html ->
            layoutSlides . splitJoinColumns . wrapBoxes . wrapNoteRevealjs
                -- TODO: Maybe we need some latex specific structure
          Disposition Handout Pdf -> Prelude.id
                -- TODO: Probably not much to do here
          Disposition Page Html -> Prelude.id
                -- TODO: Probably not much to do here
          Disposition Page Pdf -> Prelude.id
          Disposition Deck Pdf ->
            throw $
            InternalException "PDF slide decks via LaTeX are not supported"
  return $ mapSlides chain pandoc
Henrik Tramberend's avatar
Henrik Tramberend committed
306

307
308
309
makeBoxes :: Pandoc -> Pandoc
makeBoxes = walk (mapSlides wrapBoxes)

Henrik Tramberend's avatar
Henrik Tramberend committed
310
311
escapeToFilePath :: String -> FilePath
escapeToFilePath = map repl
312
313
314
315
316
  where
    repl c =
      if c `elem` [':', '!', '/']
        then '|'
        else c
Henrik Tramberend's avatar
Henrik Tramberend committed
317

318
useCachedImages :: FilePath -> Inline -> IO Inline
319
useCachedImages cacheDir image@(Image (ident, cls, values) inlines (url, imgTitle)) = do
320
321
322
  let cached = cacheDir </> escapeToFilePath url
  exists <- doesFileExist cached
  if exists
323
324
325
    then return
           (Image (ident, "cached" : cls, values) inlines (cached, imgTitle))
    else return image
326
327
328
useCachedImages _ inline = return inline

localImagePath :: Inline -> [FilePath]
329
330
331
332
localImagePath (Image _ _ (url, _)) =
  if isHttpUri url
    then []
    else [url]
333
334
335
localImagePath _ = []

extractLocalImagePathes :: Pandoc -> [FilePath]
336
extractLocalImagePathes = Text.Pandoc.Walk.query localImagePath
337
338
339
340

isHttpUri :: String -> Bool
isHttpUri url =
  case parseURI url of
341
    Just uri -> uriScheme uri `elem` ["http:", "https:"]
342
    Nothing -> False
Henrik Tramberend's avatar
Henrik Tramberend committed
343

344
cachePandocImages :: FilePath -> Inline -> IO Inline
345
cachePandocImages base image@(Image _ _ (url, _))
346
347
  | isHttpUri url = do
    cacheImageIO url base
348
349
    return image
  | otherwise = return image
350
cachePandocImages _ inline = return inline
Henrik Tramberend's avatar
Henrik Tramberend committed
351

352
-- | Downloads the image behind the URI and saves it locally. Returns the path of
353
-- the cached file relative to the base directory.
354
355
356
357
358
359
360
361
cacheImageIO :: String -> FilePath -> IO ()
cacheImageIO uri cacheDir = do
  request <- parseRequest uri
  result <- httpLBS request
  let body = getResponseBody result
  let cacheFile = cacheDir </> escapeToFilePath uri
  createDirectoryIfMissing True cacheDir
  L8.writeFile cacheFile body
362

363
364
365
366
renderMediaTags :: Pandoc -> Decker Pandoc
renderMediaTags pandoc = do
  disp <- gets disposition
  return $ walk (renderImageAudioVideoTag disp) pandoc
367
368
369

-- | File extensions that signify video content.
videoExtensions :: [String]
370
videoExtensions =
371
  [".mp4", ".m4v", ".webm", ".ogg", ".avi", ".dv", ".mp2", ".mov", ".qt"]
372

373
374
-- | File extensions that signify audio content.
audioExtensions :: [String]
375
audioExtensions = [".m4a", ".mp3", ".ogg", ".wav"]
376

377
378
379
380
-- | File extensions that signify iframe content.
iframeExtensions :: [String]
iframeExtensions = [".html", ".html", ".pdf"]

381
uriPathExtension :: String -> String
382
383
384
uriPathExtension reference =
  case U.parseRelativeReference reference of
    Nothing -> takeExtension reference
385
386
    Just uri -> takeExtension (U.uriPath uri)

387
388
classifyFilePath :: FilePath -> MediaType
classifyFilePath name =
389
  case uriPathExtension name of
390
391
392
393
    ext
      | ext `elem` videoExtensions -> VideoMedia
    ext
      | ext `elem` audioExtensions -> AudioMedia
394
395
    ext
      | ext `elem` iframeExtensions -> IframeMedia
396
397
    _ -> ImageMedia

398
399
-- Renders an image with a video reference to a video tag in raw HTML. Faithfully
-- transfers attributes to the video tag.
400
renderImageAudioVideoTag :: Disposition -> Inline -> Inline
401
renderImageAudioVideoTag disp (Image (ident, cls, values) inlines (url, tit)) =
402
  RawInline (Format "html") (renderHtml imageVideoTag)
403
  where
404
    imageVideoTag =
405
406
407
408
409
410
411
412
      if "iframe" `elem` cls
        then mediaTag (iframe "Browser does not support iframe.")
        else case classifyFilePath url of
               VideoMedia -> mediaTag (video "Browser does not support video.")
               AudioMedia -> mediaTag (audio "Browser does not support audio.")
               IframeMedia ->
                 mediaTag (iframe "Browser does not support iframe.")
               ImageMedia -> mediaTag img
413
414
    appendAttr element (key, value) =
      element ! customAttribute (stringTag key) (toValue value)
415
416
417
418
    mediaTag tag =
      ifNotEmpty A.id ident $
      ifNotEmpty class_ (unwords cls) $
      ifNotEmpty alt (stringify inlines) $
Henrik Tramberend's avatar
Henrik Tramberend committed
419
      ifNotEmpty title tit $ foldl appendAttr tag transformedValues
420
421
422
423
424
    ifNotEmpty attr value element =
      if value == ""
        then element
        else element ! attr (toValue value)
    srcAttr =
425
      if disp == Disposition Deck Html
426
427
428
429
        then "data-src"
        else "src"
    transformedValues = (lazyLoad . transformImageSize) values
    lazyLoad vs = (srcAttr, url) : vs
430
renderImageAudioVideoTag _ inline = inline
431
432
433
434
435
436
437
438
439
440
441
442

-- | Mimic pandoc for handling the 'width' and 'height' attributes of images.
-- That is, transfer 'width' and 'height' attribute values to css style values
-- and add them to the 'style' attribute value.
transformImageSize :: [(String, String)] -> [(String, String)]
transformImageSize attributes =
  let style :: [String]
      style =
        delete "" $
        split (dropDelims $ oneOf ";") $
        fromMaybe "" $ snd <$> find (\(k, _) -> k == "style") attributes
      unstyled :: [(String, String)]
443
      unstyled = filter (\(k, _) -> k /= "style") attributes
444
      unsized =
445
446
        filter (\(k, _) -> k /= "width") $
        filter (\(k, _) -> k /= "height") unstyled
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
      size =
        ( snd <$> find (\(k, _) -> k == "width") unstyled
        , snd <$> find (\(k, _) -> k == "height") unstyled)
      sizeStyle =
        case size of
          (Just w, Just h) -> ["width:" ++ w, "height:" ++ h]
          (Just w, Nothing) -> ["width:" ++ w, "height:auto"]
          (Nothing, Just h) -> ["width:auto", "height:" ++ h]
          (Nothing, Nothing) -> []
      css = style ++ sizeStyle
      styleAttr = ("style", intercalate ";" $ reverse $ "" : css)
  in if null css
       then unstyled
       else styleAttr : unsized

-- | Moves the `src` attribute to `data-src` to enable reveal.js lazy loading.
lazyLoadImage :: Inline -> IO Inline
lazyLoadImage (Image (ident, cls, values) inlines (url, tit)) = do
  let kvs = ("data-src", url) : [kv | kv <- values, "data-src" /= fst kv]
  return (Image (ident, cls, kvs) inlines ("", tit))
lazyLoadImage inline = return inline