{- Reform, a decompiler for Z-machine story files. Copyright 2004 Ben Rudiak-Gould. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You can read the GNU General Public License at this URL: http://www.gnu.org/copyleft/gpl.html -} module Reform_grammar ( GrammarLine(..), GrammarToken(..), grammarType, verbs, verbNums, actionRoutines, preactionRoutines, DictWordTag(..), kbdInputCodes, dictionary, dictWordAt, isDictWord, getInfocomAdjectiveNum ) where import Reform_storyfile import Reform_symfile import Reform_zscii import Control.Monad (when,liftM,liftM2,replicateM) import Control.Monad.State (evalState) import Data.Array (Array,listArray,accumArray,(!),elems,bounds) import Data.Bits (shiftR,testBit,(.&.),(.|.)) import Ix (range) import Maybe (isJust,fromMaybe) import Numeric (showHex) -- words after verb action reverse data GrammarLine = GrammarLine [GrammarToken] Int Bool deriving Show verbs :: [([String],[GrammarLine])] verbNums :: Array Int [String] verbs = reverse (zip (reverse (elems verbNums)) grammarEntries) verbNums = accumArray (flip (:)) [] (0,255) [(n,x) | (x,_,tags) <- reverse (elems dictionary), DictVerb n <- tags] preps :: Array Int [String] preps = accumArray (flip (:)) [] (0,255) [(n,s) | (s,_,tags) <- elems dictionary, DictPrep n <- tags] grammarEntries :: [[GrammarLine]] actionRoutines, preactionRoutines :: [Int] (grammarEntries, actionRoutines, preactionRoutines) = evalFrom hdrStaticMemoryBaseAddr grammarParser data GrammarToken = ElementaryToken String | Preposition String | Attribute Int | ParseRoutine String Int | InfocomObject Int Int | Alternatives [GrammarToken] deriving (Show,Eq,Ord) {-----------} -- Heuristics to determine grammar table type taken from InfoDump, except -- that I disambiguate GV1 and GV2 in a slightly different way (maybe -- better, maybe worse, definitely easier to code). (grammarType,grammarParser,dictDataParser) = case compiler of Inform6 -> parserType_inform6 Inform5 -> ("Inform5", parseGV1 False, parseDictGV1) -- parseDictGV1??? Zilch | ver [6] -> ("InfocomV6", parseInfocomV6, parseDictInfocomV6) | otherwise -> let firstEntry = wordAt hdrStaticMemoryBaseAddr secondEntry = wordAt (hdrStaticMemoryBaseAddr+2) entryCount = byteAt firstEntry in if (secondEntry - firstEntry - 1) `div` entryCount <= 7 then ("InfocomVariable", parseInfocomVariable, parseDictInfocom) else ("InfocomFixed", parseInfocomFixed, parseDictInfocom) parserType_inform6 = let firstEntry = wordAt hdrStaticMemoryBaseAddr entryBlock = fromTo hdrStaticMemoryBaseAddr firstEntry allEntries = evalState (repeatUntilEmpty getUWord) entryBlock entrySizes = zipWith (-) (tail allEntries) allEntries isGV1 = all mightBeGV1 entrySizes isGV2 = all mightBeGV2 entrySizes mightBeGV1 n = n `mod` 8 == 1 mightBeGV2 n = n `mod` 3 == 1 in if null allEntries then ("No grammar", return ([],[],[]), parseDictGV1) else if isGV1 && isGV2 then error "Unable to disambiguate between Inform 6 GV1 and GV2" else if isGV1 then ("GV1", parseGV1 True, parseDictGV1) else if isGV2 then ("GV2", parseGV2, parseDictGV1) else error "Unable to determine Inform 6 grammar type" {---------------} parseInfocomFixed = do entries <- getGrammarTable parseInfocomFixedLine let maxActionIndex = maximum [i | GrammarLine _ i _ <- concat entries] skipZeroes actionRoutines <- replicateM (maxActionIndex+1) getUWord preactionRoutines <- replicateM (maxActionIndex+1) getUWord return (entries, actionRoutines, preactionRoutines) parseInfocomFixedLine = do objCount <- getUByte prepIndex1 <- getPrepIndex prepIndex2 <- getPrepIndex a <- getUByte b <- getUByte c <- getUByte d <- getUByte let object1 = InfocomObject a c object2 = InfocomObject b d actionNum <- getUByte let toks = case objCount of 0 -> [] 1 -> [object1] 2 -> object1 : maybePrep prepIndex2 [object2] _ -> error "Grammar table error: object count too high" return $ GrammarLine (maybePrep prepIndex1 toks) actionNum False getPrepIndex = do p <- getUByte return (if p < 0x80 then 0 else p) maybePrep 0 rest = rest maybePrep p rest = prepositionInt p : rest prepositionInt n = case preps ! n of [x] -> Preposition x xs -> Alternatives (map Preposition xs) {---------------} parseInfocomVariable = do entries <- getGrammarTable parseInfocomVariableLine let maxActionIndex = maximum [i | GrammarLine _ i _ <- concat entries] skipZeroes actionRoutines <- replicateM (maxActionIndex+1) getUWord preactionRoutines <- replicateM (maxActionIndex+1) getUWord return (entries, actionRoutines, preactionRoutines) parseInfocomVariableLine = do byte <- getUByte let objCount = byte `shiftR` 6 prepIndex1 = byte .&. 63 actionNum <- getUByte toks <- parseInfocomVariableLine' objCount return $ GrammarLine (maybePrep' prepIndex1 toks) actionNum False parseInfocomVariableLine' 3 = error "Grammar table error: object count too high" parseInfocomVariableLine' objCount = do if objCount == 0 then return [] else do object1 <- getInfocomObject if objCount == 1 then return [object1] else do byte <- getUByte let prepIndex2 = byte .&. 63 object2 <- getInfocomObject return (object1 : maybePrep' prepIndex2 [object2]) maybePrep' 0 rest = rest maybePrep' p rest = prepositionInt (p .|. 0xC0) : rest getInfocomObject = liftM2 InfocomObject getUByte getUByte {---------------} parseGV1 inform6 = do entries <- getGrammarTable parseGV1Line let maxActionIndex = maximum [i | GrammarLine _ i _ <- concat entries] maxParseIndex | inform6 = maximum [i | GrammarLine ts _ _ <- concat entries, ParseRoutine _ i <- ts] | otherwise = maxActionIndex skipZeroes actionRoutines <- replicateM (maxActionIndex+1) getUWord parseRoutines <- replicateM (maxParseIndex+1) getUWord return (map (map (fixupGV1Routines parseRoutines)) entries, actionRoutines, []) fixupGV1Routines parseRoutines (GrammarLine tokens action reverse) = GrammarLine (map helper tokens) action reverse where helper (ParseRoutine prefix n) = ParseRoutine prefix (parseRoutines !! n) helper (Alternatives tokens) = Alternatives (map helper tokens) -- probably unnecessary helper x = x parseGV1Line = do tokenCount <- getUByte tokenArea <- getBytes 6 let tokenBytes = evalState (getGV1Tokens tokenCount) tokenArea tokens = map parseGV1TokenByte tokenBytes actionNum <- getUByte return $ GrammarLine tokens actionNum False getGV1Tokens left = do eos <- isEOS if eos then return [] else do token <- getUByte if left == 0 && token < 176 then (if token == 0 then return [] else error "bad GV1 token count") else do rest <- getGV1Tokens (if token < 176 then (left-1) else left) return (token : rest) parseGV1TokenByte t | t < 9 = ElementaryToken (elementaryTokenTypes !! t) | t < 16 = error ("Illegal token value " ++ show t) | t < 48 = ParseRoutine "noun=" (t-16) -- FIXME: index into table | t < 80 = ParseRoutine "" (t-48) -- FIXME: index into table | t < 112 = ParseRoutine "scope=" (t-80) -- FIXME: index into table | t < 128 = error ("Illegal token value " ++ show t) | t < 176 = Attribute (t-128) | otherwise = prepositionInt t -- FIXME: index into table? {---------------} parseGV2 = do entries <- getGrammarTable parseGV2Line let maxActionIndex = maximum [i | GrammarLine _ i _ <- concat entries] skipZeroes actionRoutines <- replicateM (maxActionIndex+1) getUWord return (entries, actionRoutines, []) parseGV2Line = do actionWord <- getUWord let actionNum = actionWord .&. 0x3FF reverseParams = testBit actionWord 10 tokens <- parseGV2Tokens return $ GrammarLine (groupGV2Tokens tokens) actionNum reverseParams parseGV2Tokens = do tokenType <- getUByte if tokenType == 15 then return [] else do tokenData <- getUWord tokens <- parseGV2Tokens return ((tokenType .&. 0x30,parseGV2Token (tokenType .&. 15) tokenData) : tokens) parseGV2Token 1 d = ElementaryToken (elementaryTokenTypes !! d) parseGV2Token 2 d = Preposition (dictWordAt d) parseGV2Token 3 d = ParseRoutine "noun=" d parseGV2Token 4 d = Attribute d parseGV2Token 5 d = ParseRoutine "scope=" d parseGV2Token 6 d = ParseRoutine "" d elementaryTokenTypes = ["noun", "held", "multi", "multiheld", "multiexcept", "multiinside", "creature", "special", "number", "topic"] groupGV2Tokens ((0,x):rest) = x : groupGV2Tokens rest groupGV2Tokens ((32,x):rest) = Alternatives (x : map snd xs) : groupGV2Tokens rest' where (xs,rest') = break (\(n,_) -> not (testBit n 4)) rest groupGV2Tokens ((n,x):rest) = error (show n) groupGV2Tokens [] = [] {---------------} parseInfocomV6 = return ([],[],[]) -- FIXME {---------------} getGrammarTable :: StreamReader GrammarLine -> StreamReader [[GrammarLine]] -- The grammar table begins with a table of pointers to entries. getGrammarTable parser = do firstPtr <- getUWord pos <- getPos remainingPtrs <- getBytes (firstPtr - pos) let ptrs = firstPtr : evalState (repeatUntilEmpty getUWord) remainingPtrs mapM (getVerbTable parser) ptrs getVerbTable parser addr = do pos <- getPos if (pos /= addr) then contigError addr pos else do table <- getTable getUByte parser skipZeroes return table contigError addr pos = error ("Grammar table not contiguous: next pointer is 0x" ++ showHex addr (", but expected 0x" ++ showHex pos "")) {---------------} skipZeroes = do x <- peekUByte when (x == 0) (getUByte >> skipZeroes) {---------------} {- (parseActionCount,parseCount,parsePrepType, parseVerbTableBase,parseVerbDataBase, parseActionTableBase,parsePreactTableBase, parsePrepTableBase,parsePrepTableEnd) = findParseTableBounds parserType findParseTableBounds Infocom6Grammar = let actionTableBase = wordAt (hdrObjectTableAddr-4) preactTableBase = wordAt (hdrObjectTableAddr-2) dictData = [(b!!0 * 256 + b!!1, last b) | (_,b) <- dictEntries] -- (parse_index,flags) pairs dictVerbs = filter isDictVerb dictData isDictVerb (parseIndex,flags) = testBit flags 0 && not (testBit flags 7) && parseIndex /= 0 && parseIndex < actionTableBase vbase = minimum (map fst dictVerbs) vend = maximum (map fst dictVerbs) + 8 verbParseEntries = map (\(parseIndex,_) -> take 4 (wordsFrom parseIndex)) dictVerbs area2base = minimum (concatMap (drop 2) verbParseEntries) area2end1 = maximum (map (!! 2) verbParseEntries) area2end2 = maximum (map (!! 3) verbParseEntries) area2end = max (area2end1 + 2 + 6 * wordAt area2end1) (area2end2 + 2 + 10 * wordAt area2end2) in ((preactTableBase - actionTableBase) `div` 2, undefined, undefined, vbase, area2base, actionTableBase, preactTableBase, area2end, area2end) -} {---------------} -- Dictionary data DictWordTag = DictVerb Int | DictPrep Int | DictNoun | DictPlural | DictMeta | DictSpecial Int | DictAdj Int | DictDir Int | DictBytes [Int] | DictMaybeTruncated deriving (Show,Eq) kbdInputCodes :: [Int] dictionary :: Array Int (String,String,[DictWordTag]) (kbdInputCodes,dictionary,numEntries,entryBaseAddr,entryLength) = evalFrom hdrDictionaryAddr parseDictionary dictWordByteCount = if ver [1..3] then 4 else 6 parseDictionary = do kbdInputCodes <- getTable getUByte getUByte entryLength <- getUByte numEntries <- getUWord entryBaseAddr <- getPos entries <- replicateM numEntries (parseDictionaryWord entryLength) return (kbdInputCodes, listArray (0,numEntries-1) entries, numEntries, entryBaseAddr, entryLength) parseDictionaryWord entryLength = do encodedWord <- getBytes dictWordByteCount let chars = evalState getZChars encodedWord word = zCharsToUnicode chars truncated = if maybeTruncated chars then [DictMaybeTruncated] else [] bytes <- getBytes (entryLength - dictWordByteCount) let tags = truncated ++ evalState dictDataParser bytes return (mungeDictWord word tags, word, tags) mungeDictWord word tags = word' ++ attr where word' = sfFullDictWord word attr | '/' `elem` word' = "" | DictPlural `elem` tags = "//p" | length word == 1 = "//" | otherwise = "" -- The dictionary word might be truncated even if it ends with -- a 5 (padding value), but it's rare and clutters up the list -- with a bunch of false positives. maybeTruncated x = last x /= 5 dictWordAt :: Int -> String dictWordAt addr = case addrToDictWordIndex addr of Just n -> case dictionary!n of (word,_,_) -> word Nothing -> "invalidDictWord" ++ show addr addrToDictWordIndex addr | addr < entryBaseAddr = Nothing | otherwise = case (addr - entryBaseAddr) `divMod` entryLength of (d,0) -> if d < numEntries then Just d else Nothing (_,_) -> Nothing isDictWord addr = isJust (addrToDictWordIndex addr) {---------------} getInfocomAdjectiveNum :: Int -> Maybe Int getInfocomAdjectiveNum n = case [pos | pos <- range (bounds dictionary), DictAdj n' <- thd3 (dictionary!pos), n == n'] of [] -> Nothing (pos:_) -> Just (entryBaseAddr + pos * entryLength) thd3 (_,_,z) = z {---------------} parseDictGV getPrepNumber = do flags <- getUByte verbNumber <- getUByte prepNumber <- getPrepNumber return $ (if testBit flags 0 then [DictVerb verbNumber] else []) ++ (if testBit flags 3 then [DictPrep prepNumber] else []) ++ (if testBit flags 7 then [DictNoun] else []) ++ (if testBit flags 2 then [DictPlural] else []) ++ (if testBit flags 1 then [DictMeta] else []) parseDictGV1 = parseDictGV getUByte parseDictGV2 = parseDictGV (getUByte >> return 0) -- FIXME: Sherlock has only two bytes per dict entry, instead -- of three. What's up with that? parseDictInfocom = do flags <- getUByte -- -- Determining the contents of the next two bytes based on -- the flags is quite nasty, and txd/infodump isn't much -- help. It seems to work like this: the flag bits are -- checked in a certain weird order (2,3,6,7,4,5). The first -- one it finds set gets the value from the first byte, and -- the second from the second byte. Except that if the two -- low bits of the flag byte are nonzero, they override the -- default order, causing a certain bit to be checked first. -- If there's a third set bit, it apparently doesn't get a -- byte. But there's one case where bit 7 comes before 6, -- when the flag byte is 0xC0, and it looks like in Deadline, -- bits 2 and 7 are in the opposite order. I'm sure there are -- other nasty surprises that I didn't figure out. -- let types = if flags == 0xC0 then [(0,\_->DictNoun),(1,DictVerb)] else (if testBit flags 2 then [(0,DictSpecial)] else []) ++ (if testBit flags 3 then [(0,DictPrep)] else []) ++ (if testBit flags 6 then [(1,DictVerb)] else []) ++ (if testBit flags 7 then [(0,\_->DictNoun)] else []) ++ (if testBit flags 4 then [(3,DictDir)] else []) ++ (if testBit flags 5 then [(2,DictAdj)] else []) types2 = case (flags .&. 3) of -- TXD defines 0 as PREP_FIRST, but it doesn't -- appear to actually pull anything to the front. 0 -> types n -> pullToFront n id types byte1 <- getUByte byte2 <- getUByte return $ zipWith ($) (map snd (take 2 types2)) [byte1,byte2,0] where pullToFront n sofar [] = error "Invalid dictionary flags byte" pullToFront n sofar (x@(k,_) : rest) | n == k = x : sofar rest | otherwise = pullToFront n (sofar . (x:)) rest -- FIXME parseDictInfocomV6 = repeatUntilEmpty getUByte >>= \b -> return [DictBytes b]