#!/usr/bin/perl 
## cssmerge - Merge CSS blocks from multiple files
## by Ben Wheeler
## $Id: cssmerge,v 1.15 2007-03-08 22:48:27 jammin Exp $
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##
##  You should have received a copy of the GNU General Public License
##  along with this program; if not, write to the Free Software Foundation, Inc.
##  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA


=head1 NAME

cssmerge - Merge CSS blocks from multiple files into a single output

=cut

use strict;
use warnings;
use File::Basename qw(basename dirname);
use Getopt::Long;
use CSS::Tiny;

my %PREF = ( comment_start => '/*',
             comment_end => '*/',
             screen_width => 79,
	     ## CSS 2.1 spec: Identifiers may contain alphanumeric chars,
	     ## hyphen, underscore, or other chars escaped with backslash.
	     identifier_chars => 'A-Za-z0-9_\-\\\\',
	   );
$PREF{identifier_left_bound}  = qr/(?:(?<!\\)[^$PREF{identifier_chars}]|^)/;
$PREF{identifier_right_bound} = qr/(?:(?<!\\)[^$PREF{identifier_chars}]|$)/;
$PREF{comment_width} = $PREF{screen_width} - 6;

my ($prog, $progver) = (split(/[\s,]/, '$Id: cssmerge,v 1.15 2007-03-08 22:48:27 jammin Exp $'))[1,3];
my $debug = 0;
my $showfiles = 1;
my $exact = 0;
my $regex;
my ($search_selector, $search_property);
GetOptions('debug' => \$debug, 
           'selector:s' => \$search_selector,
           'property:s' => \$search_property,
           'exact' => \$exact,
           'regex' => \$regex,
           'files!' => \$showfiles,
          ) or &usage;

my %dirs;         ## Which dirs our source files live in.
my $blocks;       ## Hashref: selectors to blocks to lines to files to 1
my $files_used;   ## Hashref: files that actually had stuff pulled out of them

my $objs;
if (@ARGV) {
  ## Parse all the input files
  foreach my $file (@ARGV) {
    my $css = CSS::Tiny->read($file);
    if (!$css) {
      warn "$file: ", $CSS::Tiny::errstr, "\n";
      next;
    }
    &process_css($file, $css);
  }
} else {
  ## Parse stdin
  local $/ = '';
  my $input = <>;
  my $css = CSS::Tiny->read_string($input);
  if (!$css) {
    die "(stdin): ", $CSS::Tiny::errstr, "\n";
  }
  &process_css('(stdin)', $css);
}


my @files = sort keys %$files_used;
unless (@files) {
  if ((defined $search_selector || defined $search_property) && keys %$objs) {
    print STDERR "The search term(s) were not found in the input file(s).\n";
    exit(2);
  } else {
    print STDERR "No valid CSS blocks were read\n";
    exit(3);
  }
}
my %fileno;
for (my $i=0; $i<=$#files; $i++) {
  $fileno{$files[$i]} = $i+1;
}
&output_header(\%fileno);
foreach my $selector (sort selector_sort keys %$blocks) {
  &output_block($selector, $blocks->{$selector}, \%fileno);
}
exit(0);


sub process_css
{
  my ($file, $css) = @_;
  $objs->{$file} = $css;
  ## $css is a hashref of hashrefs. Top-level keys are selectors,
  ## second-level keys are the properties for that selector. 
  ## CSS::Tiny does *not* preserve duplicate selectors or properties
  ## within any one file (but then maybe neither does CSS.pm)
  ## These if statements are horrid. I shouldn't have so many options!
  ## There are basically 5 cases leading to a match:
  ## 1. No --selector (--property) search defined == everything matches
  ## 2. --exact, and search term exactly matches (ie. a, but not a#b) 
  ## 3. --regex, and search regex matches
  ## 4. (--selector) Normal search, search term starts with a non-identifier 
  ##    char (eg '.class', '#id', ':pseudo') and the selector matches 
  ##    (This does not apply to --property because properties must always
  ##    start with an identifier char)
  ## 5. Normal search ,search term starts with an identifier char
  ##    (eg 'element'), and the search term matches the whole selector/property
  ##    or, for properties only, any subword (beginning or ending
  ##    with a hyphen(-), of the property.
  ## The difference in case 5 between selectors and properties may seem 
  ## arbitrary, but has been chosen because it is what you normally want:
  ## '-p font' should match not just 'font' but also 'font-size', 'font-family'
  ## etc. Whereas '-s sup' should not match 'sup-per' which is a completely
  ## different and unrelated element.
  foreach my $selector (keys %$css) {
    if (!defined $search_selector                   || 
        ($exact && $selector eq $search_selector)   ||
        ($regex && $selector =~ /$search_selector/) ||
        (!$exact && !$regex && 
	 (($search_selector =~ /^[^$PREF{identifier_chars}]/ && 
          $selector =~ /(?<!\\)\Q$search_selector\E$PREF{identifier_right_bound}/) ||
	 ($search_selector =~ /^[$PREF{identifier_chars}]/ && 
	  $selector =~ /$PREF{identifier_left_bound}\Q$search_selector\E$PREF{identifier_right_bound}/)))) {
      foreach my $property (keys %{$css->{$selector}}) {
        my $value = $css->{$selector}->{$property};
        if (!defined $search_property || 
           ($exact && $property eq $search_property) ||
	   ($regex && $property =~ /$search_property/) ||
           (!$exact && !$regex &&
	    $property =~ /(?:$PREF{identifier_left_bound}|-)\Q$search_property\E(?:$PREF{identifier_right_bound}|-)/)) {
          &dprint("$selector: $property: $value: $file\n");
          $blocks->{$selector}->{$property}->{$value}->{$file} = 1;
          $files_used->{$file} = 1;
        }
      }
    }
  }
}

sub output_header
{
  my $files_short = shift;
  my @files = sort keys %$files_short;
  my @short = map { $files_short->{$_} } @files; ## Same order
  ## Use basenames if all in one directory
  if (scalar keys %dirs == 1) {
    @files = map { basename($_) } @files;
  }

  print &format_comment('*' x $PREF{comment_width});
  print &format_comment("This file was generated by $prog $progver");
  if ($showfiles) {
    if (@files == 1) {
      print &format_comment("Input file: $files[0]");
    } else {
      print &format_comment("Input files:");
      ## Work out padding
      my $longestshort = 0;
      map { $longestshort = length($_) if (length($_) > $longestshort) } @short;
    
      for (my $i=0; $i<=$#files; $i++) {
        if ($short[$i] ne $files[$i]) {
          $short[$i] = sprintf("%${longestshort}s", $short[$i]);
          print &format_comment("   $short[$i] = $files[$i]");
        } else {
          print &format_comment("   $files[$i]");
        }
      }
    }
  }

  if ($search_selector) {
    print &format_comment("Searched for selector: $search_selector");
  }
  if ($search_property) {
    print &format_comment("Searched for property: $search_property");
  }
  print &format_comment('*' x $PREF{comment_width});
}


sub output_block
{
  my $selector = shift;
  my $block = shift;
  my $files_short = shift;

  ## Don't bother showing files per line if there was only one
  if (scalar keys %$files_short == 1) {
    $showfiles = 0;
  }

  ## The length of a comment if it references all the files
  my $comment_maxlen = 0;
  map { $comment_maxlen += length($_) } values(%$files_short); ## The numbers
  $comment_maxlen += scalar(keys(%$files_short)) - 1; ## The spaces between
  ## Where to position our comments
  my $comment_tab = $PREF{screen_width} - $comment_maxlen - length($PREF{comment_start}) - length($PREF{comment_end}) - 2; ## 2 spaces
  print "\n$selector {\n";
  foreach my $prop (sort keys %$block) {
    my @val_lines = keys %{$block->{$prop}}; 
    warn "Duplicate property '$prop' in block '$selector'\n" if (@val_lines > 1);
    foreach my $value (@val_lines) {
      my $line = "$prop: $value;";
      print "  $line";
      if ($showfiles) {
        my @files = sort map { $files_short->{$_} } keys %{$block->{$prop}->{$value}};
        my %infile;
        map { $infile{$_} = 1 } @files;
        my $filelist = join(' ', map { $infile{$_} ? $_ : (" " x length($_)) } (1 .. scalar keys %$files_short));
        my $pad = $comment_tab - length($line) - 2;
        ## If the line goes past the tab, put it on the next line
        if ($pad < 2) {
          print "\n";
          $pad = $comment_tab;
        }
        my $comment = &format_comment($filelist, $comment_maxlen);
        print " " x $pad . "$comment";
      } else {
        print "\n";
      }
    }
  }
  print "}\n";
}


## Print a comment to a given width
sub format_comment
{
  my $string = shift;
  my $width = shift || $PREF{comment_width};

  my $comment = '';
  while (length ($string) > $width) {
    ## Rudimentary wrapping
    my $split_pos = rindex($string, ' ', $width);
    $comment .= &format_comment(substr($string, 0, $split_pos));
    $string = substr($string, $split_pos + 1);
  }
  my $rpad = $width - length($string);
  my $padding = ' ' x $rpad;
  $comment .= "$PREF{comment_start} $string$padding $PREF{comment_end}\n";
  return $comment;
}

=head1 SYNOPSIS

cssmerge [--debug] [--exact] [--nofiles] [--selector selname] [--property propname] [inputfile ...]

If no inputfiles are specified, stdin is read.

The merged CSS is always printed to stdout.

=cut

sub usage
{
  die <<EOT;
Usage:
  $prog [options] [--selector name] [--property name] [file ...]
  If --selector and/or --property are specified, only blocks/properties
  which match will be extracted and merged; otherwise, everything will be.
Options:
  --debug   Print lots of useless information
  --exact   Only show exact matches for the specified name(s), not names 
            which contain them. You don't want this; see the documentation.
  --nofiles Don't create comments in the output CSS showing which 
            input file(s) each line was found in. 
Documentation:
  For more information about this program, type 'perldoc $prog'.
EOT
}

## The only special thing here (so far) is the order of link pseudo-classes.
## "a" will automatically go before "a:foo" anyway, so all we have to do
## is make sure the 'foo' bits are in the right order.
sub selector_sort
{
  my $link_re = qr/^(.+?):(link|visited|hover|active)(:.+)?$/o;
  my @link_order = ('link', 'visited', 'hover', 'active');
  my %link_map;
  die "Implementation needs changing" if (@link_order > 9);
  $link_map{$link_order[$_]} = $_ for (0 .. $#link_order);
  ## None of this matters unless both of our thingies are special
  ## All we're doing here is replacing the link|visited|whatever
  ## part of the string with a number, then sorting as normal.
  if ($a =~ $link_re) {
    my $lhs = "$1:$link_map{$2}";
    $lhs .= $3 if ($3);
    if ($b =~ $link_re) {
      my $rhs = "$1:$link_map{$2}";
      $rhs .= $3 if ($3);
      ## If they're both the same, check for 
      return($lhs cmp $rhs);
    }
  }
  return($a cmp $b);
}
    
sub dprint
{
  print STDERR @_ if ($debug);
}

=head1 DESCRIPTION

B<cssmerge> merges blocks from one or more Cascading Style 
Sheets (CSS) into a single, sorted output. This may be helpful if you
are a poor web developer who has been handed a mess of conflicting 
stylesheets to work with.

=head1 INPUT

A CSS block (also called a rule set) is a set of declarations 
surrounded by curly braces and preceded by one or more selectors, like this:

    h1, h2.foo, h3#bar {
      font-size: large;
    }

For each selector, cssmerge collects all the rules that are defined
in all the input CSS files (or stdin), and merges them into one output 
block. In the above example, there are 3 selectors, so you would get 3
output blocks each containing I<font-size: large;> plus whatever other
rules are defined in other files for these selectors. No attempt is 
made to recombine blocks whose rules match into a single multi-selector
block like the above, partly because doing so may make it harder to 
find what you are looking for in the file.

Blocks are generated for every selector in the input files, unless you
use the B<--selector> or B<--property> options. 

cssmerge will warn of declarations which appear to conflict with 
one another in a particular block, such as two different 
I<font-family> declarations. It's up to you what you do with this
information!

=head1 OUTPUT

The merged output blocks are written to stdout, so redirect this to a file
if you require. Anything that isn't CSS (warnings, errors, and debugging
output) goes to stderr.

Prior to output, the blocks are sorted by selector. 

TODO: Currently the output order is ASCII, so ID selectors which begin 
with # come first, followed by class selectors beginning with . and finally 
selectors for specific elements. This is as good an order as any for
finding things, which was the primary initial purpose of cssmerge, but
it's backwards in terms of actually using the output as a stylesheet.
Styles should go from most general to most specific; this means whole
element selectors first, classes in the middle, IDs at the end. But
where should element.class or element.ID go? 

A special exception to the sort order is made for 
link pseudo-classes (:link, :visited, :hover, :active). There is
a definite "right" order for these, so we use it.

Within each block, the properties are also presented in sorted order,
for ease of finding what you're looking for.
Properties which conflict with each other will thus be found adjacent 
to each other, eg:

    h2 {
      abc: def;
      font-size: small;    /* input_one.css */
      font-size: large;    /* input_two.css */
      ghi: jkl;
    }

=head1 OPTIONS

=over 4

=item B<--debug>

You don't need this, there are no bugs :-)

=item B<--exact>

Only show exact matches for the strings specified by B<--selector> and/or 
B<--property>. You probably don't want to do this. Let me explain.

Without this option, cssmerge will show blocks whose 
selectors or properties I<contain> your requested string(s), provided
they are delimited by word boundaries. This is best explained by example:

B<--selector> I<a> does B<not> simply match anything containing the 
letter 'a' ('span', #foobar, etc) as that would not be very useful. 
Instead it will match I<a>, I<a:hover>, I<a.extlink>, 
I<a#mylink>, I<.foo a>, I<#bar a>, I<p a>, I<preE<lt>a>, I<a+p> and 
I<a img>. 
All but the last two of these examples may have an effect on any 
given E<lt>aE<gt> tag's presentation, so you really want to see them all 
in most circumstances. The last two do not relate to the E<lt>aE<gt> tag 
itself but something near or within it. But it can often be useful to see 
these anyway. 

B<--exact> B<--selector> I<a> will return at most one 
block whose selector is exactly I<a>, ignoring all the other
permutations listed above.

B<--property> I<font> will match I<font>, I<font-size>, I<font-family> etc
but not I<fontastic> or I<bouffont>. 

B<--exact> B<--property> I<font>, would only return the I<font> property, 
missing out on all the other font-related properties above.

=item B<--nofiles>

If more than one CSS input file was merged to create the output, 
cssmerge normally adds comments in the output denoting which input file(s)
each line was found in. Use B<--nofiles> to prevent this.

=item B<--property> I<propname>

Only write blocks and property names which contain I<propname>
(see also --exact).

=item B<--selector> I<selname>

Only write blocks whose selectors contain I<selname> (see also --exact)

=back 

=head1 AUTHOR

Ben Wheeler <ben@qolc.net>

=head1 COPYRIGHT

Copyright (C) 2005 Ben Wheeler / Quality Of Life Communications. 
This program is free software; you may copy, distribute and modify
it under the GNU GPL.

=cut