Commit 1932f711 authored by Henrik Tramberend's avatar Henrik Tramberend
Browse files

Better meta file handling

parent c555ed1d
---
rsync-destination:
host: tramberend@tramberend.beuth-hochschule.de
path: /var/www/html/internal/lehre/16-ws/bmi-cgg
sometext: Some random text.
course: Real-Time Rendering
semester: Winter 2016
......@@ -10,5 +6,5 @@ structured:
- Second
- Third
date: 14.5.2016
resolver: 'Meta Data Test'
csl: chicago-author-date.csl
...
rsync-destination:
host: tramberend@tramberend.beuth-hochschule.de
path: /var/www/html/internal/lehre/16-ws/bmi-cgg
......@@ -15,7 +15,7 @@ cabal-version: >=1.10
library
hs-source-dirs: src
exposed-modules: Test, Embed, Context, Utilities, Filter
exposed-modules: Pandoc, Test, Embed, Context, Utilities, Filter
build-depends: base
, aeson
, pandoc-types
......@@ -35,6 +35,7 @@ library
, filepath
, Glob
, pandoc
, pureMD5
, yaml
, mustache
, unordered-containers
......@@ -44,7 +45,9 @@ library
, network-uri
, HTTP
, http-conduit
, http-types
, highlighting-kate
, multimap
default-language: Haskell2010
executable decker
......@@ -85,85 +88,85 @@ executable tester
, random-shuffle
default-language: Haskell2010
executable include-pandoc-filter
hs-source-dirs: app
main-is: include.hs
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: base
, slides
, pandoc-types
default-language: Haskell2010
-- executable include-pandoc-filter
-- hs-source-dirs: app
-- main-is: include.hs
-- ghc-options: -threaded -rtsopts -with-rtsopts=-N
-- build-depends: base
-- , slides
-- , pandoc-types
-- default-language: Haskell2010
executable dot-pandoc-filter
hs-source-dirs: app
main-is: dot.hs
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: base
, slides
, pandoc-types
, base64-bytestring
, bytestring
, process
, blaze-markup
, blaze-html
, split
, pureMD5
default-language: Haskell2010
-- executable dot-pandoc-filter
-- hs-source-dirs: app
-- main-is: dot.hs
-- ghc-options: -threaded -rtsopts -with-rtsopts=-N
-- build-depends: base
-- , slides
-- , pandoc-types
-- , base64-bytestring
-- , bytestring
-- , process
-- , blaze-markup
-- , blaze-html
-- , split
-- , pureMD5
-- default-language: Haskell2010
executable gnuplot-pandoc-filter
hs-source-dirs: app
main-is: Gnuplot.hs
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: base
, slides
, pandoc-types
, base64-bytestring
, bytestring
, process
, blaze-markup
, blaze-html
, split
, pureMD5
, directory
default-language: Haskell2010
-- executable gnuplot-pandoc-filter
-- hs-source-dirs: app
-- main-is: Gnuplot.hs
-- ghc-options: -threaded -rtsopts -with-rtsopts=-N
-- build-depends: base
-- , slides
-- , pandoc-types
-- , base64-bytestring
-- , bytestring
-- , process
-- , blaze-markup
-- , blaze-html
-- , split
-- , pureMD5
-- , directory
-- default-language: Haskell2010
executable macros-pandoc-filter
hs-source-dirs: app
main-is: macros.hs
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: base
, slides
, pandoc-types
, containers
, split
, data-default
, blaze-markup
, blaze-html
default-language: Haskell2010
-- executable macros-pandoc-filter
-- hs-source-dirs: app
-- main-is: macros.hs
-- ghc-options: -threaded -rtsopts -with-rtsopts=-N
-- build-depends: base
-- , slides
-- , pandoc-types
-- , containers
-- , split
-- , data-default
-- , blaze-markup
-- , blaze-html
-- default-language: Haskell2010
executable slideset-pandoc-filter
hs-source-dirs: app
main-is: slideset-filter.hs
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: base
, slides
, pandoc-types
, containers
, split
, data-default
default-language: Haskell2010
-- executable slideset-pandoc-filter
-- hs-source-dirs: app
-- main-is: slideset-filter.hs
-- ghc-options: -threaded -rtsopts -with-rtsopts=-N
-- build-depends: base
-- , slides
-- , pandoc-types
-- , containers
-- , split
-- , data-default
-- default-language: Haskell2010
executable handout-pandoc-filter
hs-source-dirs: app
main-is: handout.hs
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: base
, slides
, pandoc-types
, containers
, split
, data-default
default-language: Haskell2010
-- executable handout-pandoc-filter
-- hs-source-dirs: app
-- main-is: handout.hs
-- ghc-options: -threaded -rtsopts -with-rtsopts=-N
-- build-depends: base
-- , slides
-- , pandoc-types
-- , containers
-- , split
-- , data-default
-- default-language: Haskell2010
test-suite slides-test
type: exitcode-stdio-1.0
......@@ -171,6 +174,14 @@ test-suite slides-test
main-is: spec.hs
build-depends: base
, slides
, hspec
, filepath
, pandoc
, Glob
, yaml
, containers
, unordered-containers
, text
ghc-options: -threaded -rtsopts -with-rtsopts=-N
default-language: Haskell2010
......
-- | Generally useful functions on pansoc data structures. Some in the IO monad.
module Pandoc () where
module Pandoc (isCacheableURI,adjustLocalUrl,cacheRemoteFile,Pandoc.cacheRemoteImages,Pandoc.readMetaData) where
import Control.Exception
import Control.Monad
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as L
import qualified Data.HashMap.Strict as H
import Data.List
import Data.Maybe
import qualified Data.Map as M
import qualified Data.MultiMap as MM
import Data.Digest.Pure.MD5
import qualified Data.Yaml as Y
import Network.HTTP.Conduit
import Network.HTTP.Simple
import Network.HTTP.Types.Status
import Network.URI
import System.Directory
import System.FilePath.Posix
import System.Posix.Files
import Text.Pandoc
import Text.Pandoc.Walk
import Utilities
import Debug.Trace
-- cacheRemoteAdjustLocalImages
isLocalURI :: String -> Bool
isLocalURI url = isNothing $ parseURI url
isRemoteURI :: String -> Bool
isRemoteURI = not . isLocalURI
isCacheableURI :: String -> Bool
isCacheableURI url =
case parseURI url of
Just uri -> uriScheme uri `elem` ["http:","https:"]
Nothing -> False
-- | Walks over all images in a Pandoc document and transforms image URLs like
-- this: 1. Remote URLs are not transformed. 2. Absolute URLs are intepreted
-- relative to the project root directory. 3. Relative URLs are intepreted
-- relative to the containing document.
adjustImageUrls :: FilePath -> FilePath -> Pandoc -> Pandoc
adjustImageUrls projectDir baseDir pandoc = walk adjust pandoc
where adjust (Image attr inlines (url,title)) =
(Image attr inlines (adjustLocalUrl projectDir baseDir url,title))
adjust other = other
adjustLocalUrl :: FilePath -> FilePath -> FilePath -> FilePath
adjustLocalUrl root base url
| isLocalURI url =
if isAbsolute url
then root </> makeRelative "/" url
else base </> url
adjustLocalUrl _ _ url = url
cacheRemoteImages :: FilePath -> Pandoc -> IO Pandoc
cacheRemoteImages cacheDir pandoc = walkM cacheRemoteImage pandoc
where cacheRemoteImage (Image attr inlines (url,title)) =
do cachedFile <- cacheRemoteFile cacheDir url
return (Image attr inlines (cachedFile,title))
cacheRemoteFile :: FilePath -> String -> IO FilePath
cacheRemoteFile cacheDir url
| isCacheableURI url =
do let cacheFile = cacheDir </> hashURI url
exists <- fileExist cacheFile
if exists
then return cacheFile
else do content <- downloadUrl url
createDirectoryIfMissing True cacheDir
L.writeFile cacheFile content
return cacheFile
cacheRemoteFile _ url = return url
clearCachedFile :: FilePath -> String -> IO ()
clearCachedFile cacheDir url
| isCacheableURI url =
do let cacheFile = cacheDir </> hashURI url
exists <- fileExist cacheFile
when exists $ removeFile cacheFile
clearCachedFile _ _ = return ()
downloadUrl :: String -> IO L.ByteString
downloadUrl url =
do request <- parseRequest url
result <- httpLBS request
let status = getResponseStatus result
if status == ok200
then return $ getResponseBody result
else throw $
HttpException $
"Cannot download " ++ url ++ ": status: " ++ show status
hashURI :: String -> String
hashURI uri = (show $ md5 $ L.pack uri) <.> takeExtension uri
type MetaData = M.Map FilePath Y.Value
readMetaData :: [FilePath] -> IO MetaData
readMetaData metaFiles =
do canonMetaFiles <- mapM canonicalizePath metaFiles
aDataList <- mapM decodeYaml canonMetaFiles
let joined = M.map joinHorizontally $ MM.toMap $ MM.fromList aDataList
return $
M.mapWithKey (joinVertically joined)
joined
where decodeYaml
:: FilePath -> IO (FilePath,Y.Value)
decodeYaml file =
do result <- Y.decodeFileEither file
case result of
Right object@(Y.Object _) -> return (takeDirectory file,object)
Right _ ->
throw $
YamlException $
"Top-level meta value must be an object: " ++ file
Left exception -> throw exception
joinHorizontally :: [Y.Value] -> Y.Value
joinHorizontally = foldl1 joinMeta
joinVertically
:: MetaData -> FilePath -> Y.Value -> Y.Value
joinVertically meta dir ignore =
if not $ equalFilePath dir "/"
then let up =
joinVertically meta
(takeDirectory dir)
ignore
in maybe up
(joinMeta up)
(M.lookup dir meta)
else Y.Object (H.fromList [])
joinMeta :: Y.Value -> Y.Value -> Y.Value
joinMeta (Y.Object old) (Y.Object new) = Y.Object (H.union new old)
joinMeta _ _ = throw $ YamlException "Can only join YAML objects."
......@@ -553,6 +553,7 @@ data DeckerException
= MustacheException String
| PandocException String
| YamlException String
| HttpException String
| RsyncUrlException
| DecktapeException String
deriving Typeable
......
{-# LANGUAGE OverloadedStrings #-}
import Test.Hspec
import Lib
import Data.Maybe
import Data.Text
import Pandoc
import Text.Pandoc
import Utilities
import System.FilePath
import System.FilePath.Posix
import System.FilePath.Glob
import qualified Data.Yaml as Y
import qualified Data.Map as M
import qualified Data.HashMap.Strict as HM
main = putStrLn "Test"
main =
do projectDir <- calcProjectDirectory
--
let publicDir = projectDir </> "public"
let cacheDir = publicDir </> "cache"
let supportDir = publicDir </> "support"
--
metaFiles <- globDir1 (compile "**/*-meta.yaml") projectDir
putStrLn $ show metaFiles
--
hspec $
--
do describe "isCacheableURI" $
do it "returns True if URL has http: or https: protocol" $
do isCacheableURI "http://heise.de" `shouldBe` True
isCacheableURI "ftp://heise.de" `shouldBe` False
--
describe "adjustLocalUrl" $
do it "adjusts URL to be relative to the project root or the provided base directory" $
do adjustLocalUrl projectDir "base" "http://heise.de" `shouldBe`
"http://heise.de"
--
adjustLocalUrl projectDir "base" "/some/where" `shouldBe`
projectDir </>
"some/where"
--
adjustLocalUrl projectDir "base" "some/where" `shouldBe`
"base/some/where"
--
describe "cacheRemoteFile" $
it "Stores the data behind a URL locally, if possible. Return the local path to the cached file." $
do cacheRemoteFile cacheDir "https://tramberend.beuth-hochschule.de/img/htr-beuth.jpg" `shouldReturn`
cacheDir </>
"bc137c359488beadbb61589f7fe9e208.jpg"
cacheRemoteFile cacheDir "ftp://tramberend.beuth-hochschule.de/img/htr-beuth.jpg" `shouldReturn`
"ftp://tramberend.beuth-hochschule.de/img/htr-beuth.jpg"
cacheRemoteFile cacheDir "/img/htr-beuth.jpg" `shouldReturn`
"/img/htr-beuth.jpg"
cacheRemoteFile cacheDir "img/htr-beuth.jpg" `shouldReturn`
"img/htr-beuth.jpg"
cacheRemoteFile cacheDir
"https://tramberend.beuth-hochschule.de/img/htr-beuth.jpg.wurst" `shouldThrow`
anyException
--
describe "cacheRemoteImages" $
it "Replaces all remote images in the pandoc document with locally caches copies." $
do Pandoc.cacheRemoteImages
cacheDir
(Pandoc nullMeta
[(Para [Image nullAttr
[]
("https://tramberend.beuth-hochschule.de/img/htr-beuth.jpg"
,"")])]) `shouldReturn`
(Pandoc nullMeta
[(Para [Image nullAttr
[]
(cacheDir </>
"bc137c359488beadbb61589f7fe9e208.jpg"
,"")])])
Pandoc.cacheRemoteImages
cacheDir
(Pandoc nullMeta
[(Para [Image nullAttr
[]
("https://tramberend.beuth-hochschule.de/img/htr-beuth.jpg.wurst"
,"")])]) `shouldThrow`
anyException
--
describe "readMetaData" $
it "Collects the projects meta data from all .yaml files. Combines the data hierarchically for each directory." $
do Pandoc.readMetaData metaFiles `shouldReturn`
M.fromList
[("/Users/henrik/workspace/decker/resource/example"
,Y.Object (HM.fromList
[("semester",Y.String "Winter 2016")
,("structured"
,Y.array [Y.String "First"
,Y.String "Second"
,Y.String "Third"])
,("date",Y.String "14.5.2016")
,("csl",Y.String "chicago-author-date.csl")
,("course",Y.String "Real-Time Rendering")
,("resolver",Y.String "Meta Data Test")
,("sometext",Y.String "Some random text.")]))]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment