#!/usr/bin/env python
"""
    The Electric Puppet Crossword Generator

    By Mark V.

    Type "python eptcross.py --help" for basic help, or view the
    documentation at http://eptcomic.com/eptcross.htm

    Please send any questions, comments, bug reports, or patches
    to markv@eptcomic.com

License:

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import random, re, sys
from optparse import OptionParser

class Clue:
    """A slot for a crossword clue to be filled in."""
    def __init__(self, horizontal, row, col, len_):
        self.horizontal = horizontal
        self.row = row
        self.col = col
        self.len = len_

    @property
    # Return a list of the coordinates spanned by this clue
    def cells(self):
        if(self.horizontal):
            return [(self.row,self.col+i) for i in xrange(self.len)]
        else:
            return [(self.row+i,self.col) for i in xrange(self.len)]

    def __len__(self):
        return self.len

    def pattern(self, grid):
        return "".join(grid[row][col] for (row,col) in self.cells)

    def __repr__(self):
        return str(self)

    def __str__(self):
        return "%s(%d,%d)%d" % (
            (self.horizontal and "A") or "D",
            self.row, self.col, self.len)

class Crossword:
    def __init__(self, grid, clues, clues2choices, grid2clues):
        # Crossword matrix.
        #  "." = empty
        #  None = wall
        #  All other characters are filled-in boxes
        self.grid = grid
        # Crossword clues to be filled in.
        self.clues = clues
        # Words available in the search space for each clue
        self.clues2choices = clues2choices
        # Which grid elements are covered by which clues
        #   {(int,int) => [clues...]}
        self.grid2clues = grid2clues

    def gridstring(self):
        return "\n".join("".join((i and i) or "@" for i in j)
                         for j in self.grid)+"\n"

    def write(self, out = sys.stdout, clues = False):
        out.write(self.gridstring())
        if(clues):
            for clue in self.clues:
                out.write("%s: %s\n" % (clue, clue.pattern(self.grid)))
    
    def __str__(self):
        retval = "-"*len(self.grid[0])+"\n"
        retval += self.gridstring()
        retval += "-"*len(self.grid[0])+"\n"
        for i in self.clues:
            retval += "%s [%s] %s\n" % (str(i), len(self.clues2choices[i]))

        return retval

    def __repr__(self):
        return str(self)

    def dump(self):
        retval = str(self)
        for (i, row) in enumerate(self.grid2clues):
            for (j, clues) in enumerate(row):
                retval += "(%d,%d): %s\n" % (
                    i, j, " ".join(map(str,clues)))
        return retval

    def next_clue(self, used):
        ranks = [(len(val), key) for (key,val) in self.clues2choices.items()]
        ranks.sort()
        for (n, clue) in ranks:
            if(clue not in used):
                return clue

    def get_choices(self, clue):
        retval = self.clues2choices[clue][:]
        random.shuffle(retval)
        return retval

    def apply_choice(self, clue, choice):
        # fill in letters on grid

        grid = [i[:] for i in self.grid]
        touched = set()
        for ((i,j),c) in zip(clue.cells, choice):
            grid[i][j] = c
            for k in self.grid2clues[i][j]:
                touched.add(k)
                
        # update choices for intersected clues
        clues2choices = {}
        for (i, choices) in self.clues2choices.items():
            if(i in touched):
                p = re.compile("^"+i.pattern(grid)+"$")
                clues2choices[i] = [j for j in choices
                                    if(p.search(j) is not None)]
            else:
                clues2choices[i] = choices[:]

        return Crossword(grid, self.clues, clues2choices, self.grid2clues)

    def is_valid(self):
        return all((len(i) > 0) for i in self.clues2choices.values())

def backtrack(state, max_nodes = None, debug = False):
    """Return the solution to a Crossword (state) as a filled-in
    Crossword object, or None if no solution is possible.  If
    max_nodes is exceded, print an error message and return the
    current partial solution.

    Solution is by exhaustive search of the solution space using
    the backtrack algorithm (c.f., "Combinatorial Algorithms: Theory
    and Practice", Reingold, Nievergelt and Deo 1977, pg. pg 109)
    """
    
    if(debug):
        print "Initializing backtrack"
    
    # Order that the clues are filled in
    path = [None]*len(state.clues)
    # State of the crossword at each path position
    states = [None]*len(path)
    # Remaining choices at each path position
    choices = [None]*len(path)

    # Number of nodes visited
    count = 0

    # Initialize first node
    path[0] = state.next_clue([])
    states[0] = state
    choices[0] = states[0].get_choices(path[0])

    if(debug):
        print "Initialized:"
        print "  First clue = %s" % path[0]
        print "  With %d choices" % len(choices[0])
        print "  On:"
        print states[0]
        print ">>>>>>>>>>>>>"

    # current depth
    k = 0

    if(debug):
        print "Starting main loop"

    while(k > -1):
        while((k < len(path)) and (len(choices[k]) > 0)):
            # pop next choice
            choice = choices[k][-1]
            choices[k] = choices[k][:-1]
            count += 1
            if(max_nodes and (count > max_nodes)):
                sys.stderr.write("Maximum iterations (%d) exceded\n" %
                                 max_nodes)
                return states[k]

            state = states[k].apply_choice(path[k], choice)

            if(debug):
                print state

            if(state.is_valid()):
                if(debug):
                    print "State is valid"
                k += 1
                if(k == len(path)):
                    if(debug):
                        print "Solved!"
                    # Solved!
                    return state
                path[k] = state.next_clue(path[:k])
                states[k] = state
                choices[k] = states[k].get_choices(path[k])
                if(debug):
                    print "Descending to %s with %d choices" % (
                        path[k], len(choices[k]))

        # backtrack
        k -= 1
        if(debug):
            print "Backtracking to"
            print states[k]
        
    sys.stderr.write("Solution space exhausted\n")
    return None

def load_grid(fname):
    def x(c):
        if(c == "@"):
            return None
        else:
            return c
    
    rows = []
    for line in open(fname):
        row = re.sub(r"[^A-Za-z\.@]","",line)
        if(len(row) > 0):
            rows.append([x(i) for i in row.lower()])
    assert(all((len(i) == len(rows[0])) for i in rows[1:]))
    return rows

def load_lexicon(fnames):
    words = {}
    allowed = re.compile("^[a-z]+$")
    for fname in fnames:
        for line in open(fname):
            word = line.strip().lower()
            if(allowed.search(word) is None):
                continue
            try:
                words[len(word)].add(word)
            except KeyError:
                words[len(word)] = set((word,))
    return words

def make_crossword(grid, lexicon):
    clues = find_clues(grid)
    
    clues2choices = {}
    for clue in clues:
        pattern = clue.pattern(grid)
        if(all((i == ".") for i in pattern)):
            clues2choices[clue] = list(lexicon[len(clue)])
        else:
            p = re.compile("^"+pattern+"$")
            clues2choices[clue] = [j for j in lexicon[len(clue)]
                                   if(p.search(j) is not None)]

    grid2clues = [[[] for i in xrange(len(grid[0]))]
                  for j in xrange(len(grid))]
    for clue in clues:
        for (i,j) in clue.cells:
            grid2clues[i][j].append(clue)
            
    return Crossword(grid, clues, clues2choices, grid2clues)

def find_clues(grid):
    clues = []
    rows = len(grid)
    cols = len(grid[0])

    # Find horizontal clues
    (row,col) = (0,0)
    start = None
    for i in xrange(rows*cols):
        if(grid[row][col] is None):
            if(start is not None):
                clues.append(Clue(True, row, start, col - start))
                start = None
        elif(start is None):
            start = col
        col += 1
        if(col >= cols):
            if(start is not None):
                clues.append(Clue(True, row, start, col - start))
                start = None
            col = 0
            row += 1
            if(row >= rows):
                break
    else:
        raise IndexError, "Unexpected H exit at (%s, %s [%s]) [%s]" % (
            row, col, start, len(clues))

    # Find vertical clues
    (row,col) = (0,0)
    start = None
    for i in xrange(rows*cols):
        if(grid[row][col] is None):
            if(start is not None):
                clues.append(Clue(False, start, col, row - start))
                start = None
        elif(start is None):
            start = row
        row += 1
        if(row >= rows):
            if(start is not None):
                clues.append(Clue(False, start, col, row - start))
                start = None
            row = 0
            col += 1
            if(col >= cols):
                break
    else:
        raise IndexError, "Unexpected V exit at (%s, %s [%s]) [%s]" % (
            row, col, start, len(clues))

    return clues

if(__name__ == "__main__"):
    parser = OptionParser(
        usage = "usage: %prog [options] grid.txt",
        description = ("%prog generates a crossword by filling in a grid "
                       "with words from a user-provided or system-default "
                       "dictionary.  Input and output grids are formatted "
                       "as text: each line gives a row of the crossword, "
                       "'.' indicates empty spaces, '@' indicates black "
                       "squares, and all other characters indicate filled "
                       "squares (whitespace is ignored). "
                       #  Omitting example until we can figure out how
                       #  to selectively disable word-wrapping
                       #"E.g.:\n"
                       #".....@...\n"
                       #"frank@...\n"
                       #"..@......\n"
                       #"..@......\n"
                       "Any words in a "
                       "partially-filled input grid must be present in "
                       "the dictionary (this can be ensured by using the "
                       "--dictionary option twice to concatenate the "
                       "system dictionary with a custom dictionary). "
                       "All rows must be the same length.\n"
                       "Please send any questions, comments, or patches "
                       "to markv@eptcomic.com")
        )
    parser.add_option(
        "-D", "--dictionary", dest = "dictionary",
        metavar = "FILENAME",
        default = [],
        action = "append",
        help = ("Load words from FILENAME.  You may give this option "
                "multiple times to concatenate multiple dictionaries."))
    parser.add_option(
        "-s", "--seed", dest = "seed",
        metavar = "SEED",
        default = None,
        type = "int",
        help = ("Seed the random number generator with SEED.  A given "
        "SEED will always generate the same solution for a given problem. "
        "If SEED is not given, the default system seeding will be used."))
    parser.add_option(
        "-d", "--debug", dest = "debug",
        action = "store_true",
        default = False,
        help = "Turn on verbose debugging messages.")
    parser.add_option(
        "-n", "--maxnodes", dest = "maxnodes",
        metavar = "NODES",
        default = 100000,
        type = "int",
        help = ("Explore at most NODES nodes of the search tree before "
                "giving up.  Larger values will solve more puzzles, but "
                "will increase the maximum possible run time. Set NODES "
                "to 0 for unbounded searches."))

    parser.add_option(
        "-c", "--showclues", dest = "showclues",
        action = "store_true",
        default = False,
        help = ("Print a clue list with the solution.  (Default is to "
                "only print the solved grid)."))
        
    (options, args) = parser.parse_args()

    if(len(args) < 1):
        parser.print_help()
        sys.exit(1)

    if(options.seed):
        random.seed(options.seed)
    else:
        random.seed()

    grid = load_grid(args[0])

    if(len(options.dictionary) < 1):
        lexicon = load_lexicon(["/usr/share/dict/words"])
    else:
        lexicon = load_lexicon(options.dictionary)
    
    crossword = make_crossword(grid, lexicon)

    if(options.debug):
        print "lexicon:"
        for (key, val) in lexicon.items():
            print key, len(val)

        print "crossword:"
        print crossword.dump()

        print "Starting backtrack search..."

    maxnodes = options.maxnodes
    if(maxnodes < 1):
        maxnodes = None

    solution = backtrack(crossword,
                         max_nodes = maxnodes,
                         debug = options.debug)

    if(options.debug):
        print "Solution:"

    if(solution):
        solution.write(clues = options.showclues)
    else:
        print "Unsolvable!"

