#!/usr/bin/perl
# puz-decode - conver 1 PUZ-format crossword from file or stdin to HTML or PS.
# Update: 2010-09-16 05:16:40 CDT (Sep Thu) added rendering for circled cells
# \ Had to overhaul data structures to store info in each grid cell.
# Update: 2010-09-16 02:19:22 CDT (Sep Thu) added extension parsing
# | See the following for more info:
# \   http://code.google.com/p/puz/wiki/FileFormat
# Update: 2007-11-11 22:01:30 CST (Nov Sun) assume 3 lines of title/auth/copy
# Update: 2007-06-07 23:45:48 CDT (Jun Thu) 1181277948 - erlkonig
# |  renamed to reflect commandline puz-[tab] use, harmful extension removed.
# |  modified arg parse section to support reading from stdin
# |  modified header parsing to work with 3 variants of copyright symbol
# |    - todo: add support for Unicode variant, or obviate this check entirely.
# |  modified numbered cell determination so words require at least 2 cells
# \    - thus making the litsoft Cryptic example finally work.  
#
# TODO: fix dependence on parsing (c) sign in inumerable forms (UTF-8, JIS..?)
# TODO: process 4-digit decryption codes, make encrypted answers viewable.
# TODO: allow interaction in HTML (would minimally require JavaScript).
# TODO: - if interaction supported (in a form), allow for .PUZ output.

use strict;
use POSIX;

# source: http://loxosceles.org/antics/crossword_display/
#EXAMPLES
#    # print the puzzle as a PDF
#    puz-decode -P foo.puz | ps2pdf - foo.pdf
#
#    # create a HTML display of the puzzle
#    puz-decode foo.puz > foo.html

my ($psmode, $answers, $key, $file, $data);

sub Syntax ()
{
	return ("USAGE: $0 [-k<key> | -a]... [<file.puz>] > <puzzle.html>\n"
			."       $0 [-k<key> | -a]... -P [<file.puz>] > <puzzle.ps>\n"
			." -k<key>   - use scramble <key> to unlock answers, ex: -k1234\n"
			." -a        - show answers\n"
			." -P        - output in PostScript rather than HTML\n"
			."reads from standard input if no file given\n");
}

foreach my $arg (@ARGV) {
	if($arg eq '-h') { Syntax; exit(0); }
	elsif($arg eq '-P')            { $psmode  = 1; }
	elsif($arg eq '-a')            { $answers = 1; }
	elsif($arg =~ m/-k(\d\d\d\d)/) { $key = $1; }
	else { $file = $arg; }
}

if($file) {
	die "file $file does not exist\n" unless -e $file;
	die "file $file is not readable. Are you sure it's a .puz file?\n" 
		unless -f _ and -r _;
	if(open(my $fh, $file)) {
		local $/;
		$data = <$fh>;
		close $fh;
	}
} else {
	local $/;
	$data  = <STDIN>;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - have data 
# fill in data structures from the file

# get the size of the grid        str    offs  len   (offset == 44)
my ($w, $h) = unpack("CC", substr($data, 0x2C, 2));

# The combined grid of dicts for each cell
my $grid = [];
for my $x (0 .. $w-1) {     # $x = X distance from left of grid
	$grid->[$x] = [];
	for my $y (0 .. $h-1) { # $y = Y distance from top of grid
		$grid->[$x][$y] = {}
	}
}

# First, the answers (which may be scrambled).
my $scrambled = unpack('s', substr($data, 0x32, 2));
#                                 ==52
my $answer_string = substr($data, 0x34, $w*$h);
for my $y (0 .. $h-1) {
	for my $x (0 .. $w-1) {
		my $answer_char = substr($answer_string, 0, 1, '');
		$grid->[$x][$y]->{'answer'} = $answer_char;
	}
}

# Convert the black/white grid ('.' for black and '-' for white) to booleans:
my $bw_string = substr($data, 0x34+$w*$h, $w*$h);
for my $y (0 .. $h-1) {
	for my $x (0 .. $w-1) {
		$grid->[$x][$y]->{'black'} = substr($bw_string, 0, 1, '') eq '.';
	}
}

my $pending = substr($data, 0x34+$w*$h+$w*$h); 

# the list of clues contains a few lines of footer material, to be
# dealt with later.
$pending =~ s/([^\0]*)\0//;    my $title        = sanitize_text($1);
$pending =~ s/([^\0]*)\0//;    my $author       = sanitize_text($1);
$pending =~ s/([^\0]*)\0//;    my $copyright    = sanitize_text($1);
$pending =~ s/\0\0(.*)//;      my $section_data = $1;
my @clues = map { sanitize_text($_) } split(/\0/, $pending);

my @sections;					# $sections[n]{'title|length|checksum|data'}
my %section_types = ('GRBS' => 'grid of rebus squares each with RTBL ## + 1',
					 'RTBL' => 'rebus solutions, each in form "##:solution;"',
					 'LTIM' => 'timer status as <secs-elasped>:<elapsing>',
					 'GEXT' => 'grid of squares flagged: 0x10 was wrong, 0x20 is wrong, 0x40 given, 0x80 circled',
					 'RUSR' => '<undocumented>');
while(length($section_data)) {
	$section_data =~ s/^(........)//;
	my ($s_type, $s_length, $s_checksum) = unpack("a4vv", $1);
	my %section = ( 'type'     => $s_type,
					'length'   => $s_length,
					'checksum' => $s_checksum );
	if(defined($section_types{$s_type})) {
		$section{'desc'} = $section_types{$s_type};
	}
	$section{'data'} = substr($section_data, 0, $s_length, '');
	substr($section_data, 0, 1, ''); # toss section's final NUL
	push(@sections, \%section);
}

my %rebuses;    				# $rebuses{$number} = $solution
for my $section (@sections) {
	if($section->{'type'} eq 'GEXT') {
		my $data = $section->{'data'};
		my $usage = 0;			# add one per flag used per cell
		for my $y (0 .. $h-1) {
			for my $x (0 .. $w-1) {
				my $cell = $grid->[$x][$y];
				my $flag = unpack('C', (substr($data, 0, 1, '')));
				if($flag & 0x80) { $cell->{'circled'}  = 1; ++$usage; }
				if($flag & 0x40) { $cell->{'given'}    = 1; ++$usage; }
				if($flag & 0x20) { $cell->{'iswrong'}  = 1; ++$usage; }
				if($flag & 0x10) { $cell->{'waswrong'} = 1; ++$usage; }
			}
		}
		$section->{'note'} = $usage . ' total flags set';
	} elsif($section->{'type'} eq 'GRBS') {
		my $data = $section->{'data'};
		for my $y (0 .. $h-1) {
			for my $x (0 .. $w-1) {
				my $cell = $grid->[$x][$y];
				my $rebus_num = unpack('C', (substr($data, 0, 1, '')));
				if($rebus_num > 0) { $cell->{'rebus'} = $rebus_num - 1; }
			}
		}
		if($section->{'checksum'} == 0) {
			$section->{'note'} = 'zero checksum - probably no rebuses present';
		}
	} elsif($section->{'type'} eq 'RTBL') {
		my $data = $section->{'data'};
		my @rebuses = split(/;/, $data);
		my @numbers;
		for my $number_and_solution (@rebuses) {
			my ($number, $solution) = split(/:/, $number_and_solution);
			$rebuses{$number} = $solution;
			push(@numbers, $number);
		}
		$section->{'note'} = sprintf('%d rebuses numbered (%s)', $#numbers,
									 join(' ', @numbers));
	}
}

# it also contains a few lines at the beginning. The last
# uninteresting line starts with a copyright symbol.
# [addendum - unless it's a (c) instead.  Is the "few" a constant? -erlkonig]
$copyright =~ s/\xa9/©/;        # iso-8859-1 (i think...)
$copyright =~ s/[(][Cc][)]/©/;  # "(c)", etc.
$copyright =~ s/^\s*©/©/;
my @header_lines = ($title, $author, $copyright);

my @across; # list of across clues with numbers
my @down; # list of down clues with numbers

# The most pressing issue now is to figure out the numbers and assign
# them to the grid and the clues.

# Black squares will be filled with -1's. We'll have an extra row on
# the top, and column on the left, filled with -1's.

sub cell($$$) {   # grid, $x, $y -- cell or undef
	my ($grid, $x, $y) = @_;
	my $cell = undef;
	if(($x >= 0) && ($y >= 0) && defined($grid->[$x][$y])) {
		$cell = $grid->[$x][$y];
	}
	return $cell;
}

# A white square with  left:black and right:white is starting an across.
# A white square with above:black and below:white is starting a down.
my $c = 1;;
for my $y (0 .. $h-1) {
	for my $x (0 .. $w-1) {
		my ($cell, $left, $right, $above, $below) = (cell($grid, $x,   $y),
													 cell($grid, $x-1, $y),
													 cell($grid, $x+1, $y),
													 cell($grid, $x,   $y-1),
													 cell($grid, $x,   $y+1));
		my $start_across = ( ! $cell->{'black'}
							 && ( ! defined($left)  ||    $left->{'black'})
							 &&     defined($right) && ! $right->{'black'});
		my $start_down   = ( ! $cell->{'black'}
							 && ( ! defined($above) ||   $above->{'black'})
							 &&     defined($below) && ! $below->{'black'});
		if($start_across || $start_down) {
			$cell->{'cluenum'} = $c;
			push(@across, "$c. " . shift(@clues)) if ($start_across);
			push(@down,   "$c. " . shift(@clues)) if ($start_down);
			$c++;
		}
	}
}

if ($psmode) {
  psify_puzzle($grid, \@across, \@down, \@header_lines, \@clues);
} else {
  my $header = join "<br />", @header_lines;
  my $footer = '';
  if($scrambled) {
	  $footer .= 'puzzle is scrambled, code '.$scrambled.".\n";
  }
  if($#sections > 0) {
	  $footer .= "extension sections:\n";
	  for my $section (@sections) {
		  $footer .= sprintf("%s (%d B sum %6s) %s\n",
							 $section->{'type'},
							 $section->{'length'},
							 $section->{'checksum'},
							 $section->{'desc'});
		  if(defined($section->{'note'})) {
			  $footer .= '   (' . $section->{'note'} . ")\n";
		  }
	  }
  }
  htmlify_puzzle($grid, \@across, \@down, $header, $footer);
}

# subs

sub sanitize_text {    # mostly to remove those stupid windows characters
	my ($s) = @_;
	$s =~ s/\341/'/g;
	return $s;
}

sub debug_grid {
	my $twod_array = shift;
	for my $i (0..$#$twod_array){
		print STDERR ">";
		for my $j (0..$#{$twod_array}){
			print STDERR "$twod_array->[$i][$j]  ";
		}
		print STDERR "\n";
	}
	print STDERR "------------------------\n";
}

sub htmlify_puzzle {
	my ($grid, $across, $down, $header, $footer) = @_;
	my $html = <<EIEIO;
<html>
<head>
<title>Crossword Puzzle</title>
<style>
//body { width: 600px }
.grid * td { background-color: white; width: 27px; height: 25px; margin: 0; padding: 0; vertical-align: top; text-align:center; }
.grid * td.black { background-color:black !important; }
.grid * td.circled { background-color:#def !important; }
div.number { font-size: 8px; font-family: sans-serif; color: #666; margin: 0; padding: 0; text-align:left; }
.layout { width:100%; }
.layout * td { vertical-align:top; }
.across { text-align:left; }
.grid { }
.down1, .down2 { text-align:left; }
.footer { clear:both; }
</style>
</head>
<body>

EIEIO
	my $html_grid;
    {
		$html_grid .= ('<table class="grid" '
					   . 'border="1" cellpadding="0" cellspacing="0">'
					   ."\n");
		for my $y (0 .. $h-1) {
			$html_grid .= "<tr>\n";
			for my $x (0 .. $w-1) {
				my $cell = $grid->[$x][$y];
				if ($cell->{'black'}) {
					$html_grid .= ("\t"
								   .'<td class="black">'
								   .'<img src="black.png" alt=""/>'
								   .'</td>'."\n");
				} else {
					my $class = '';
					if(defined($cell->{'circled'})) {
						$class = ' class="circled"';
					}
					$html_grid .= "\t<td$class>";
					my $cluenum_text = (defined($cell->{'cluenum'})
										? $cell->{'cluenum'}
										: '&nbsp;');
					$html_grid .= ('<div class="number">'
								   . $cluenum_text
								   .'</div>');
					my $answer_text = '&nbsp;';
					if(defined($answers) && defined($cell->{'answer'})) {
						$answer_text = $cell->{'answer'};
					}
					$html_grid .= $answer_text . "</td>\n";
				}
			}
			$html_grid .= "</tr>\n";
		}
		$html_grid .= "</table>\n";
	}

		# clues down
    my (@down1, @down2);
    {
		my @d = @$down;
		while(@d) {
			push(@down1, shift(@d));
			unshift(@down2, pop(@d)) if(@d);
		}
	}

    # add header elements
    {
		$html .= '<div class="header">'.$header."</div>\n";

		$html .= '<table class="layout"><tr><td>';

		$html .= ('<div class="across">' . '<h3>Across</h3>'  # across
				  . join("<br />\n", @$across)
				  . "</div>\n");

		$html .= "</td>\n<td>\n";

		$html .= $html_grid;			                         # grid

		$html .= '<table class="layout"><tr><td>';

		$html .= ('<div class="down1">'   . '<h3>Down</h3>'    # down 1 
				  . join("<br />\n", @down1)
				  . "</div>\n");			

		$html .= "</td>\n<td>\n";

		$html .= ('<div class="down2">'   . '<h3>Down</h3>'    # down 2
				  . join("<br />\n", @down2)
				  . "</div>\n");			

		$html .= '</td></tr></table>'."\n";
		$html .= '</td></tr></table>'."\n";

# add footer elements
		$html .= '<div class="footer"><hr /><pre>'.$footer."</pre></div>";

# now finish
		$html .= '</body></html>';
		print $html;
	}
}

sub psify_puzzle {
  my ($grid, $across, $down, $header, $footer) = @_;
  my $page_height = 11.0;
  my $page_width = 8.5;
  print <<EOPS;
%!PS-Adobe-2.0
%%PageOrder: Ascend
%%Title: Apr0807.puz
%%Creator: decode_crossword.pl (C) 2007 Beth Skwarecki, Richard M Kreuter
%%BoundingBox: 0 0 612 792
%%DocumentPaperSizes: Letter
%%EndComments
%%BeginProlog

% These are all the various global parameters to this PS program.

% Units of measure.
/cm { 72 2.54 div mul } def
/in { 72 mul } def

% Physical dimensions of the page.
/page-width $page_width in def
/page-height $page_height in def

% Logical dimensions of the puzzle grid
/grid-rows $h def
/grid-cols $w def

% Font and size to use for the header text
/header-font-name /Times-Bold def
/header-font-size 14 def

% Font and size to use for the clue subheader text.  Note: these fonts
% must be ISO-8859-1 encoded.  See the procedure RE below.  Note also:
% Unicode-aware emacsen may transcode iso-8859-1 encoded characters to
% Unicode, which will screw things up.
/clue-header-font-name /ISOTimes-Bold def
/clue-header-font-size 12 def

% Font and size to use for the clue text.
/clue-font-name /ISOTimes-Roman def
/clue-font-size 11 def

% Font and size to use for the labels in the puzzle boxes
/number-font-name /ISOTimes-Roman def
/number-font-size 6 def

% Vertical and horizontal margins around the content in the page.
/page-margin-top .50 in def
/page-margin-bottom .50 in def
/page-margin-left .50 in def
/page-margin-right .50 in def

% Spacing between columns of clues.
/column-space 12 def

% How much of the first page to devote to the puzzle grid.
/grid-share grid-rows 15 le { 1 2 div } { 2 3 div } ifelse def
/numcols grid-rows 15 le { 4 } { 3 } ifelse def

% The rest are procedure definitions and global variables,
% should not need editing

% Re-encode fonts for ISO8859-1.
/RE { % /NewFontName [NewEncodingArray] /FontName RE -
   findfont dup length dict begin
   {
       1 index /FID ne
       {def} {pop pop} ifelse
   } forall
   /Encoding exch def
   /FontName 1 index def
   currentdict definefont pop
   end
} bind def
/ISOTimes-Roman ISOLatin1Encoding /Times-Roman RE
/ISOTimes-Bold ISOLatin1Encoding /Times-Bold RE


/page-visible-horizontal page-width page-margin-left page-margin-right add sub def

/column-width page-visible-horizontal numcols 1 sub column-space mul sub numcols div def

% Total size of the puzzle grid
/grid-size page-visible-horizontal grid-share mul def

/grid-horizontal-position page-width grid-size page-margin-left add sub def
/grid-vertical-position  page-height grid-size page-margin-top add sub def

% Physical size of each puzzle grid cell
/cell-size grid-size grid-cols div def

% Make a box path.
/box {        % w h
  dup         % w h h
  0 exch      % w h 0 h
  rlineto     % w h
  exch        % h w
  dup 0       % h w w 0
  rlineto     % h w
  exch        % w h
  -1 mul      % w -h
  0 exch      % w 0 -h
  rlineto     % w
  -1 mul 0    % -w 0
  rlineto
} def

% Make a circle path.
/circle-of-radius {           % radius --
  gsave
    0 exch          % 0 radius
    0 exch          % 0 0 radius
    0 360           % 0 0 radius 0 360
    newpath
    arc
    stroke
  grestore
} def

% Make a square box path.
/square-box { % w
  dup box
} def

% Move the current path to the bottom left corner of the cell at (row,
% column). Note: row/column is really y/x inverted to match row-major
%    * ordering in the host language.
/moveto-cell { % row col
  exch 1 add exch
  cell-size mul exch
  grid-cols exch sub
  cell-size mul
  moveto
} def

/cell-rowcol-to-xy { % row col
  exch 1 add exch
  cell-size mul exch
  grid-cols exch sub
  cell-size mul
} def

% Add border to cell at (row, col).
/border-cell { % row col
  newpath
  moveto-cell  % consumes row and cols from stack
  cell-size square-box
  stroke
} def

% Fill the grid cell at (row, col).
/fill-cell { % row col
  newpath
  moveto-cell
  cell-size square-box
  fill
} def

% Label the grid cell at (row, col) with a string.
/number-cell { % string row col
  newpath
  number-font-name findfont number-font-size scalefont setfont
  moveto-cell
  0 cell-size rmoveto
  2 -1 number-font-size mul rmoveto
  show fill
} def

% Letter the grid cell at (row, col) with an answer string.
/letter-cell { % string row col
  newpath
  clue-font-name findfont clue-font-size scalefont setfont
  moveto-cell
  cell-size clue-font-size sub 2 div dup rmoveto % not well analyzed
  show fill
} def

% Circle the grid cell at (row, col).
/circle-cell { % row col
  gsave
    cell-rowcol-to-xy translate	% -
    cell-size 2 div dup dup     % cellsize/2 cellsize/2 cellsize/2
    translate                   % cellsize/2
	0.5 setgray
    circle-of-radius
  grestore
} def

%% Stuff for filling text in columns.
/first-page true def
/column-number 0 def
/column-start {
    column-number numcols mod dup
    column-width mul exch
    column-space mul add
    page-margin-left add
} def
/column-indent 0 def

/dup2 {    % a b
  dup      % a b b
  3 2 roll % b b a
  dup      % b b a a
  4 1 roll % a b b a
  exch     % a b a b
} def

/nextcolumn-maybe {
    currentpoint exch pop curr-font-size sub page-margin-bottom lt {
	/column-number column-number 1 add def
	column-number numcols eq {
	    showpage
	    /first-page false def
	} if
	column-start page-height page-margin-top sub curr-font-size sub moveto
	first-page {
            currentpoint pop % x
            column-width add grid-horizontal-position ge {
		0 grid-vertical-position column-space sub -1 mul rmoveto
	    } if
	} if
    } if
} def

/nextline {
    currentpoint pop column-start sub -1 mul curr-font-size -1 mul rmoveto
    column-indent 0 rmoveto
} def

/nextline-maybe {   % colwidth text
  dup2              % colwidth text colwidth text
  stringwidth pop   % colwidth text colwidth textwidth
  currentpoint pop  % colwidth text colwidth textwidth hpos
  add               % colwidth text colwidth hoffset
  exch              % colwidth text hoffset colwidth
  column-start add  % colwidth text hoffset colright
  gt {              % colwidth text
    nextline
  } if
  nextcolumn-maybe
} def

/showline {           % colwidth text
    nextcolumn-maybe
    dup length 0 gt { % colwidth text
	( ) search
	{                           % colwidth text2 ( ) text1
            4 -1 roll dup 5 1 roll exch  % colwidth text2 ( ) colwidth text1
	    nextline-maybe          % colwidth text2 ( ) colwidth text1
            show pop                % colwidth text2 ( )
            show                    % colwidth text2
            showline
	}
	{                           % colwidth text
	    nextline-maybe          % colwidth text
	    nextcolumn-maybe
	    show pop                %
	} ifelse
    } if
} def

/show-text-in-column-at { % colwidth text x y
  moveto
  showline
} def

% Show the lines of header.
/show-header-line { % x y text
  clue-header-font-name findfont clue-header-font-size scalefont setfont
  /curr-font-size header-font-size def
  newpath
  3 1 roll moveto
  column-width exch currentpoint show-text-in-column-at
  nextline
  nextline
  currentpoint
  fill
} def

% Show the clue header ("Across" or "Down")
/show-clue-header {
  clue-header-font-name findfont clue-header-font-size scalefont setfont
  /curr-font-size clue-header-font-size def
  newpath
  3 1 roll moveto
  column-width exch currentpoint show-text-in-column-at
  nextline
  currentpoint
  fill
} def

% Show the clue.
/show-clue {
  clue-font-name findfont clue-font-size scalefont setfont
  /curr-font-size clue-font-size def
  dup ( ) search pop
  3 1 roll pop pop
  stringwidth pop ( ) stringwidth pop add /column-indent exch def
  newpath
  3 1 roll moveto
  column-width exch currentpoint show-text-in-column-at
  /column-indent 0 def
  nextline
  currentpoint
  fill
} def
%%EndProlog

EOPS

    print <<EOPS;
gsave
  grid-horizontal-position grid-vertical-position translate
  newpath
  0 0 moveto
EOPS
  for my $y (0 .. $h-1) {
    for my $x (0 .. $w-1) {
		my $cell = $grid->[$x][$y];
		if ($cell->{'black'}) { # black square
			printf "  %d %d fill-cell\n", $y, $x;
		} else {				# white square
            if (defined($cell->{'cluenum'})) { # with number
				printf("  (%d) %d %d number-cell\n",
					   $cell->{'cluenum'}, $y, $x);
			}
            if(defined($answers) && defined($cell->{'answer'})) {
				printf("  (%s) %d %d letter-cell\n", $cell->{'answer'}, $y, $x);
			}
            if(defined($cell->{'circled'})) {
				printf("  %d %d circle-cell\n", $y, $x);
            }
		}
		printf(" %d %d border-cell\n", $y, $x); # last, to favor border color
    }
}
  print <<EOPS;
grestore

gsave
  newpath
  page-margin-left page-height page-margin-top sub
EOPS

  # FIXME: I dunno whether Ghostscript can handle utf-8 input.  Some
  # kind of string transcoding is probably required for
  for my $line (@$header) {
	  $line =~ s/(^\s+|\s+\$)//g;
	  $line =~ s/([()])/\\\1/g;
	  $line =~ s/©/\xa9/g;
	  printf "  (%s) show-header-line\n", $line;
  }
  printf "  (Across) show-clue-header\n";
  for my $clue (@$across) {
    printf "  (%s) show-clue\n", $clue;
  }
  printf "  (Down) show-clue-header\n";
  for my $clue (@$down) {
    printf "  (%s) show-clue\n", $clue;
  }

  print <<EOPS;
  pop pop
  fill
grestore
showpage
EOPS
}
