#!/opt/local/bin/python2.5

import optparse
import cssutils
import cssutils.css
import logging
from lxml import etree
import os.path
import numpy

cssutils.log.setLevel(logging.CRITICAL)

def getOptions():
	parser = optparse.OptionParser(version="%prog 0.0.1")
	parser.usage = """
	%prog [options] [file [file ...]]
	"""

	parser.add_option("-l", "--file-list", dest="list", default="",
		help="""file with newline-separated list of files to grab CSS stats from""")

	return parser.parse_args()

def getFileStats(fileName=None):
	if not len(fileName) or not fileName: return

	print ""
	print "file:", fileName
	print 40*"-"

	totalRules = 0
	totalLines = 0
	totalSheets = 0
	if fileName:
		tree = etree.HTML(open(fileName).read())
		sheets = tree.findall(".//style")
		print "	# of internal style sheets:", len(sheets)

		linkedRules = []

		for s in sheets:
			txt = str(s.text)
			sheet = cssutils.parseString(txt)
			totalRules += len(sheet.cssRules)

			# find the @import rules
			for r in sheet.cssRules:
				if isinstance(r, cssutils.css.CSSImportRule):
					linkedRules.append(r.href)

			totalLines += len(sheet.cssText.split("\n"))

		externalSheets = tree.findall(".//link[@rel='stylesheet']")
		linkedRules.extend(map(lambda x: x.get("href"), externalSheets))
		print "	# of external style sheets:", len(externalSheets)

		fileBase = os.path.split(fileName)[0]+os.path.sep
		for url in linkedRules:
			path = ""
			url = str(url)
			if url.startswith("http://"):
				# path is ./out/whatever....
				path = "./out/" + url[7:]
			else:
				if url.startswith("/"):
					# we count on the URL being at the root level
					url = "."+url
				path = os.path.join("./", fileBase, url)

			# print "path:", path
			sheet = cssutils.parseFile(path)
			totalRules += len(sheet.cssRules)
			totalLines += len(sheet.cssText.split("\n"))

		totalSheets = (len(sheets)+len(linkedRules))
		print "	# of CSS rules on the page:", totalRules
		# print "	# @import rules on the page:", len(linkedRules)
		# print "	mean # of rules/sheet:", totalRules/len(sheets)
		print "	lines of CSS", totalLines
		print "	total # of style sheets:", totalSheets

	return (totalRules, totalLines, totalSheets)



def main():
	# parse out our options
	(options, args) = getOptions()

	files = []

	if len(options.list):
		files.extend(open(options.list).read().split("\n"))

	files.extend(args)

	files = filter(len, files)

	totals = map(getFileStats, files)

	totalRules = map(lambda x: x[0], totals)
	totalLines = map(lambda x: x[1], totals)
	totalSheets = map(lambda x: x[2], totals)

	print ""
	print "Totals:"
	print 40*"-"
	print "	files examined:", len(files)
	print "	total # of CSS rules:", numpy.sum(totalRules)
	print "	normalized lines of CSS:", numpy.sum(totalLines)
	print "	average # of CSS lines per file:", int(numpy.mean(totalLines)), "(mean)", int(numpy.median(totalLines)), "(median)"
	print "	average # of CSS rules per file:", int(numpy.mean(totalRules)), "(mean)", int(numpy.median(totalRules)), "(median)"
	print "	average # of CSS style sheets per file:", int(numpy.mean(totalSheets)), "(mean)", int(numpy.median(totalSheets)), "(median)"

if __name__ == "__main__":
	main()

