Commit 32222b7d authored by Angelov, Georgi's avatar Angelov, Georgi
Browse files

Merge branch 'return_explanations' into 'master'

Indirection off

See merge request !3
parents 7a7d7973 84817cad
This diff is collapsed.
......@@ -313,7 +313,7 @@ manualSynonyms
-- Note: These largely align with definition noise words (links)
--
prefixNoiseWords
= ["a","an","it","it's","one","that","that is","that's","the",
= ["a","an","it","it's","one","our","that","that is","that's","the",
"there's","these","they","this","what's","what is","who","who's"]
--
......@@ -695,10 +695,14 @@ indirectSynonymsTable
("banker", "river"),
("battle scene", "battle"),
("body", "organisation"),
("capital city", "capital"),
("land", "region"),
("title", "status"),
("bread", "money"),
("china", "friend"),
("chessman", "grandmaster"),
("chess player", "grandmaster"),
("chess champion", "grandmaster"),
("distance", "measure"),
("distances", "measures"),
("diver", "sea bird"),
......@@ -727,6 +731,7 @@ indirectSynonymsTable
("king's evil", "disease"),
("maths function", "function"),
("material", "cloth"),
("piece of music", "music"),
("pm", "prime minister"),
("push off", "leave"),
("six footer", "insect"),
......@@ -2373,6 +2378,7 @@ synList
("12-inch", ["ft","foot"]),
("12 in", ["ft","foot"]),
("12 inch", ["ft","foot"]),
("a", ["an"]),
("a week", ["pw"]),
("a month", ["pm"]),
("a year", ["pa"]),
......@@ -2441,6 +2447,7 @@ synList
("bowler", ["drake"]),
("breakfast cereal", ["muesli"]),
("bribe", ["bung"]),
("bright star", ["nova","supernova"]),
("bridge player", ["n","s","e","w","north","south","east","west"]),
("bridge partner", ["n","s","e","w","north","south","east","west"]),
("brought in", ["earned"]),
......@@ -2459,11 +2466,7 @@ synList
("censor", ["cato"]),
("changing pitch", ["vibrato"]),
("cheddar", ["gorge"]),
("chessman",["alekhine","tal","lasker","kramnik","botvinnik",
"karpov","capablanca","fischer","carlsen","kasparov"]),
("chess champion",["alekhine","tal","lasker","kramnik","botvinnik",
"karpov","capablanca","fischer","carlsen","kasparov"]),
("chess player",["alekhine","tal","lasker","kramnik","botvinnik",
("grandmaster",["alekhine","tal","lasker","kramnik","botvinnik",
"karpov","capablanca","fischer","carlsen","kasparov"]),
("cloth",["barathea","barkcloth","brilliantine",
"capilene","charmeuse","charvet","chiengora","cloque",
......@@ -2490,8 +2493,9 @@ synList
("court case", ["trial"]),
("cow", ["chasten"]),
("chemical", ["nitre"]),
("chess grandmaster", ["tal"]),
("chess piece", ["pawn"]),
("chess federation", ["fide"]),
("chess organisation", ["fide"]),
("chewed", ["bit"]),
("chinese", ["han"]),
("chip", ["spall"]),
......@@ -2610,6 +2614,7 @@ synList
("fascist", ["duce"]),
("father christmas", ["santa"]),
("fell", ["axe"]),
("fiddle", ["finger"]),
("fielder", ["cover","slip","point","gully"]),
("fielders", ["covers","slips","points","gullies"]),
("fillet", ["bone","debone"]),
......@@ -2902,8 +2907,11 @@ synList
("sober", ["tt"]),
("social worker", ["ant","bee"]),
("software mogul", ["billgates"]),
("some", ["little"]),
("spicy dish", ["balti","phal","vindaloo"]),
("spirit", ["gin","rum","vodka"]),
("splash out", ["spend"]),
("spread", ["passon"]),
("spy", ["agent","mole","m","q"]),
("stableman", ["lad"]),
("stablewoman", ["lass"]),
......@@ -3385,6 +3393,7 @@ homophonesTable
["rote", "wrote"],
["rough", "ruff"],
["rouse", "rows"],
["rondo", "rondeau"],
["rung", "wrung"],
["rye", "wry"],
["saver", "savour"],
......@@ -3882,7 +3891,6 @@ allAbbreviations
("cheers", ["ta"]),
("chemist", ["mps"]),
("cherished", ["pet"]),
("chess grandmaster", ["tal"]),
("chi", ["x"]),
("chief", ["ch","cid"]),
("chief executive", ["dg"]),
......@@ -4264,7 +4272,7 @@ allAbbreviations
("frequency", ["f"]),
("frequently", ["fr"]),
("friar", ["tuck"]),
("friday", ["f","man"]),
("friday", ["f","fr","man"]),
("from", ["ex","de"]),
("full moon", ["o"]),
("function", ["cot","asin","acos","cos","log","tan","sin"]),
......@@ -4852,6 +4860,7 @@ allAbbreviations
("oriental", ["e","ine"]),
("ortho ", ["o"]),
("otorhinolaryngology", ["ent"]),
("other ranks", ["or"]),
("ought", ["o"]),
("ounce", ["oz"]),
("our era", ["ad"]),
......@@ -5275,6 +5284,7 @@ allAbbreviations
("sun", ["s","ra"]),
("sun god", ["ra"]),
("sun god", ["sol"]),
("sunday", ["sun"]),
("sunday school", ["ss"]),
("superior", ["u","up"]),
("support", ["bra","leg"]),
......@@ -5350,6 +5360,7 @@ allAbbreviations
("three times a day", ["tid"]),
("thrice", ["tri","ter"]),
("through", ["per"]),
("thursday", ["th","thu"]),
("thus", ["sic","so"]),
("time", ["t","sec","mo"]),
("times", ["by","x"]),
......@@ -5389,7 +5400,7 @@ allAbbreviations
("trouble", ["ail","ado"]),
("troy", ["t"]),
("tube", ["u"]),
("tuesday", ["t"]),
("tuesday", ["t","tue"]),
("tune", ["air"]),
("tungsten", ["w"]),
("turkey", ["tr"]),
......@@ -5504,7 +5515,7 @@ allAbbreviations
("way of working", ["mo"]),
("wc", ["lav"]),
("weak", ["w"]),
("wednesday", ["w"]),
("wednesday", ["w","wed"]),
("week", ["w"]),
("weight", ["g","oz","ton","gr","st","ct","wt"]),
("well", ["so"]),
......
......@@ -344,7 +344,7 @@ evaluate ps pCache clue@(clueText, n) answer evalCacheOn
-- i.e. TRIBE. With these in, we can't solve for UNKEMPT.
--
evalSubtextB t matchType matcherM matcherC generator
-- | True = rawTextMatches
| True = rawTextMatches
| expander == UseTextSyns
= rawTextMatches
| isSynonymTree t || isHyponymTree t
......
......@@ -69,8 +69,9 @@ removeWords' ws ind1 ind2
-- Then remove any ind1 words from both the candidate indicator (ws) and
-- ind2 words. If there is nothing left we've found an indicator, e.g.
-- "back" or "is back" for a reversal. Otherwise if what's
-- left (ws') appears in ind2 then we've also succeeded, e.g. "is looking
-- "back", "wiped head" etc.
-- left (ws') appears verbatim in ind2 or if every word left appears in ind2
-- then we've also succeeded. The latter accounts for compound indicators
-- like "form moving", both of which are separately in ind2.
-- Note: all arguments have been stemmed by this point.
--
......@@ -83,7 +84,8 @@ isIndicator noise ind1 ind2 ws
tryInd1 ws'
= tryInd2 (removeWords' ws' ind1 ind2)
tryInd2 (ws', ind2')
= null ws' || elem ws' ind2'
= null ws' || elem ws' ind2' ||
and [elem [w] ind2' | w <- ws']
stemInd1 ind1 noise
= removeWords noise (stemAll ind1)
......
......@@ -54,6 +54,7 @@ defInd1
"are",
"as",
"at",
"available",
"be",
"been",
"being",
......@@ -87,6 +88,7 @@ defInd1
"took",
"was",
"when",
"where",
"who",
"who's",
"with",
......@@ -246,6 +248,7 @@ anagramInd2
"agile",
"agitate",
"ail",
"alarm",
"alien",
"all over the place",
"alter",
......@@ -529,6 +532,7 @@ anagramInd2
"dancing",
"dash",
"dashing",
"daunting",
"deal",
"debacle",
"debauch",
......@@ -610,6 +614,7 @@ anagramInd2
"dislodge",
"dismantle",
"dismantled",
"dismay",
"dismember",
"disobey",
"disorder",
......@@ -624,6 +629,7 @@ anagramInd2
"dispose",
"disposition",
"dispute",
"disquiet",
"disrepair",
"disrupt",
"disrupted",
......@@ -638,13 +644,11 @@ anagramInd2
"distract",
"distraught",
"distress",
"distressed",
"distribute",
"distributed",
"distribution",
"disturb",
"disturbance",
"after disturbance",
"disturbed",
"dither",
"diverse",
......@@ -868,6 +872,7 @@ anagramInd2
"harassed",
"harm",
"harmed",
"harrow",
"harry",
"harsh",
"hash",
......@@ -891,6 +896,7 @@ anagramInd2
"horribly",
"horrid",
"horrific",
"horrify",
"horse around",
"horseplay",
"hotch-potch",
......@@ -1213,6 +1219,7 @@ anagramInd2
"not right",
"nuts",
"nutty",
"object",
"objectionable",
"oblique",
"obscene",
......@@ -1717,6 +1724,7 @@ anagramInd2
"temperamental",
"tempered",
"terrible",
"terrify",
"theatrical",
"thrash",
"thrashing about",
......@@ -2038,6 +2046,7 @@ oddsInd2
= ["50-50",
"alternately",
"at odds",
"betweentimes",
"brokenly",
"disconnectedly",
"discontinuously",
......@@ -2045,6 +2054,7 @@ oddsInd2
"even gone",
"even missing",
"even without",
"every now and then",
"every other",
"every so often",
"fitfully",
......@@ -2054,12 +2064,14 @@ oddsInd2
"irregularly",
"no even",
"not even",
"now and then",
"occasionally",
"oddly",
"randomly",
"regularly",
"skipping",
"skippingly",
"sometimes",
"sporadically",
"uneven",
"willy-nilly",
......@@ -2074,10 +2086,12 @@ evensInd1
evensInd2
= ["50-50",
"alternately",
"betweentimes",
"brokenly",
"disconnectedly",
"discontinuously",
"even",
"every now and then",
"every other",
"every so often",
"fitfully",
......@@ -2087,6 +2101,7 @@ evensInd2
"irregularly",
"no odds",
"not odd",
"now and then",
"occasionally",
"odd go",
"odd missing",
......@@ -2096,6 +2111,7 @@ evensInd2
"removed odds",
"skipping",
"skippingly",
"sometimes",
"sporadically",
"willy-nilly",
"without odd"]
......@@ -2123,6 +2139,7 @@ firstLettersNoise
"letter",
"of",
"on",
"only",
"that",
"that's",
"the",
......@@ -2255,6 +2272,7 @@ firstLetterWords
"launch",
"lessen",
"little",
"lot",
"mainly",
"minimally",
"mostly",
......@@ -2663,6 +2681,7 @@ middleLetterWords
"lie",
"limiting",
"lining",
"little",
"load",
"locate",
"lock",
......@@ -2984,6 +3003,8 @@ duplicateInd2
homophoneNoise
= ["according",
"about",
"am",
"are",
"as",
"be",
"been",
......@@ -2993,6 +3014,8 @@ homophoneNoise
"come",
"for",
"from",
"i",
"i'm",
"in",
"is",
"it",
......@@ -3013,10 +3036,13 @@ homophoneNoise
"to",
"the",
"they",
"they're",
"up",
"way",
"we",
"you"]
"we're",
"you",
"you're"]
homophoneInd1
= ["announced",
......@@ -4322,6 +4348,7 @@ subtractionWords
"rip",
"sack",
"sacrifice",
"save",
"scalp",
"scavenge",
"scoop",
......@@ -4422,6 +4449,7 @@ subtractionInd2R2
charadeNoise
= ["a",
"an",
"appear",
"as",
"be",
"been",
......
......@@ -368,7 +368,7 @@ getIndicatedCharadeArg1Trees pCache index
= applyFilter p pCache index
where
p (_, (Charade ind _, _))
| ind == noInd = False
| ind == noInd = True
| otherwise = elem indStr afterWords
where
indStr = getString pCache ind
......
......@@ -156,7 +156,7 @@ showPrunedParseWithAnswer parseNum clue@(text, n) answer
-- tree that gives the best explanation, but any will do. This also serves
-- to prune the parses, although not substantially.
--
-- To get the best explanatin when the parse trees are unequal we promote some
-- To get the best explanation when the parse trees are unequal we promote some
-- parses based on inspection of the definition and definition indicator. To
-- avoid a bad parse leapfrogging a good one the promotion function
-- (defProperties) also uses the quality score. This means we compute this
......@@ -217,7 +217,8 @@ parses clue@(s, n) answer pruner
prune :: [(Int, Parse)] -> [(Int, Parse)]
prune qps
| null answer = qps
| otherwise = map pickOne (groupBy g (sortBy f qps))
| otherwise = -- concat (groupBy g (sortBy f qps))
map pickOne (groupBy g (sortBy f qps))
where
g (_, p) (_, p') = sameParse pCache p p'
f (_, p) (_, p') = compareParses pCache p p'
......@@ -1224,8 +1225,9 @@ compareParses pCache (i, _, t) (i', _, t')
= compareTrees' t1 t2 t1' t2'
compareTrees (_, (Subtraction _ t1 t2, _)) (_, (Subtraction _ t1' t2', _))
= compareTrees' t1 t2 t1' t2'
compareTrees (_, (Charade _ ts, _)) (_, (Charade _ ts', _))
= compareTrees'' ts ts'
compareTrees (_, (Charade i ts, _)) (_, (Charade i' ts', _))
| i == i' = compareTrees'' ts ts'
| otherwise = compare i i'
compareTrees'' (t : ts) (t' : ts')
| compareTrees t t' == LT = LT
......
module Solver where
import State
import Debug.Trace
import Data.List
import Data.Char
......@@ -22,8 +21,8 @@ import Stemmer
--import Benchmarks.Cryptonite
import Benchmarks.Everyman
--import Benchmarks.Rufus
--import Benchmarks.Telegraph100000to109999
import Benchmarks.Telegraph0to10000
import Benchmarks.Telegraph100000to109999
-- import Benchmarks.Telegraph0to10000
import Benchmarks.Times0to9999
import ShowFunctions
......
......@@ -435,37 +435,66 @@ endLettersGen s (m, n)
-- minRes is the minimum number of letters that can be generated
-- by the given subtext function, e.g. 1 for first, 2 for ends...
--
spanWords :: (String -> (Int, Int) -> [(Int, String)]) ->
genByMapping :: (String -> (Int, Int) -> [(Int, String)]) ->
Int -> [String] -> (Int, Int) ->
[String]
spanWords f minRes ws con
= map concat (spanWords' ws lenws con)
genByMapping f minRes ws con
= map concat (genByMapping' ws lenws con)
where
lenws = length ws
spanWords' [] _ _
genByMapping' [] _ _
= [[]]
spanWords' (w : ws) len (m, n)
genByMapping' (w : ws) len (m, n)
| len == 1
= map (\(k, s) -> [s]) (f' w m n)
| otherwise
= [s : rest |
(k, s) <- f' w 1 (n - reserve),
rest <- spanWords' ws (len - 1) (m - k, n - k)]
rest <- genByMapping' ws (len - 1) (m - k, n - k)]
where
reserve = (len - 1) * minRes
f' s m n = f s (max 1 m, max 0 n)
genFirstLetters
= spanWords firstLettersGen 1
genByConcat f [w] bs
= []
genByConcat f ws bs
= f left (length left) cmid lmid right (length right) bs
where
(left, mid, right) = extractMiddle ws
cmid = concat mid
lmid = length cmid
firstLettersMap left lenL mid lenM right lenR (m, n)
| sumL < n
= [leftmid ++ take k right |
k <- [max 1 (m - sumL) .. n - sumL]]
| otherwise = []
where
sumL = lenL + lenM
leftmid = left ++ mid
-- Need: [(k,k') | k <- [1..min len1 (m-1)], k' <- [m-k..min (n-k) len2]]
middleLettersMap left lenL mid lenM right lenR (m, n)
| lenM <= n - 2
= [take k left ++ mid ++ take k' right |
k <- [1 .. min lenL (m' - 1)],
k' <- [m' - k .. min (n' - k) lenR]]
| otherwise = []
where
(m', n') = (m - lenM, n - lenM)
genFirstLetters ws bs
= genByMapping firstLettersGen 1 ws bs ++
genByConcat firstLettersMap ws bs
genLastLetters
= spanWords lastLettersGen 1
= genByMapping lastLettersGen 1
genMiddleLetters
= spanWords middleLettersGen 1
genMiddleLetters ws bs
= genByMapping middleLettersGen 1 ws bs
genEndLetters
= spanWords endLettersGen 2
= genByMapping endLettersGen 2
middleLetters :: String -> [String]
middleLetters s
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment