[Rivet-svn] r3085 - in trunk: . bin pyext

blackhole at projects.hepforge.org blackhole at projects.hepforge.org
Sun May 8 18:32:55 BST 2011


Author: buckley
Date: Sun May  8 18:32:55 2011
New Revision: 3085

Log:
Extending flat2aida to be able to read from and write to stdin/out as for
aida2flat, and also eliminating the internal histo parsing representation in
favour of the one in lighthisto. lighthisto's fromFlat also needed a bit of an
overhaul: it has been extended to parse each histo's chunk of text (including
BEGIN and END lines) in fromFlatHisto, and for fromFlat to parse a collection of
histos from a file, in keeping with the behaviour of fromDPS/fromAIDA. Merging
into Professor is now needed.

Modified:
   trunk/ChangeLog
   trunk/bin/aida2flat
   trunk/bin/flat2aida
   trunk/pyext/lighthisto.py

Modified: trunk/ChangeLog
==============================================================================
--- trunk/ChangeLog	Sun May  8 14:16:28 2011	(r3084)
+++ trunk/ChangeLog	Sun May  8 18:32:55 2011	(r3085)
@@ -1,5 +1,15 @@
 2011-05-08  Andy Buckley  <andy at insectnation.org>
 
+	* Extending flat2aida to be able to read from and write to
+	stdin/out as for aida2flat, and also eliminating the internal
+	histo parsing representation in favour of the one in
+	lighthisto. lighthisto's fromFlat also needed a bit of an
+	overhaul: it has been extended to parse each histo's chunk of
+	text (including BEGIN and END lines) in fromFlatHisto, and for
+	fromFlat to parse a collection of histos from a file, in keeping
+	with the behaviour of fromDPS/fromAIDA. Merging into Professor is
+	now needed.
+
 	* Extending aida2flat to have a better usage message, to accept
 	input from stdin for command chaining via pipes, and to be a bit
 	more sensibly internally structured (although it also now has to

Modified: trunk/bin/aida2flat
==============================================================================
--- trunk/bin/aida2flat	Sun May  8 14:16:28 2011	(r3084)
+++ trunk/bin/aida2flat	Sun May  8 18:32:55 2011	(r3085)
@@ -89,10 +89,13 @@
 
 
     ## Initialise steering variables which need a bit more care
+    import re
     if opts.PATHPATTERNS is None:
         opts.PATHPATTERNS = []
+    opts.PATHPATTERNS = [re.compile(r) for r in opts.PATHPATTERNS]
     if opts.PATHUNPATTERNS is None:
         opts.PATHUNPATTERNS = []
+    opts.PATHUNPATTERNS = [re.compile(r) for r in opts.PATHUNPATTERNS]
     if opts.GNUPLOT:
         opts.SPLITOUTPUT = True
 
@@ -117,34 +120,32 @@
     ## Run over the files and build histo objects selected by the pattern filtering
     histos = {}
     for aidafile in args:
-        if aidafile != "-":
-            if not os.access(aidafile, os.R_OK):
-                logging.error("%s can not be read" % aidafile)
-                sys.exit(1)
         try:
             if aidafile == "-":
                 tree = ET.parse(sys.stdin)
             else:
+                if not os.access(aidafile, os.R_OK):
+                    logging.error("%s can not be read" % aidafile)
+                    sys.exit(1)
                 tree = ET.parse(aidafile)
         except:
             logging.error("%s can not be parsed as XML" % aidafile)
             sys.exit(1)
         for dps in tree.findall("dataPointSet"):
-            useThisDps = True
+            useThis = True
             dpspath = os.path.join(dps.get("path"), dps.get("name"))
-            import re
             if opts.PATHPATTERNS:
-                useThisDps = False
+                useThis = False
                 for regex in opts.PATHPATTERNS:
-                    if re.compile(regex).search(dpspath):
-                        useThisDps = True
+                    if regex.search(dpspath):
+                        useThis = True
                         break
-            if useThisDps and opts.PATHUNPATTERNS:
+            if useThis and opts.PATHUNPATTERNS:
                 for regex in opts.PATHUNPATTERNS:
-                    if re.compile(regex).search(dpspath):
-                        useThisDps = False
+                    if regex.search(dpspath):
+                        useThis = False
                         break
-            if useThisDps:
+            if useThis:
                 hist = lighthisto.Histo.fromDPS(dps)
                 try:
                     plotparser.updateHistoHeaders(hist)
@@ -157,13 +158,10 @@
     if histos:
         ## Split output per-histogram
         if opts.SPLITOUTPUT:
-            paper = os.path.basename(aidafile).replace(".aida", "")
             for f, hs in sorted(histos.iteritems()):
                 for h in sorted(hs):
                     histo = h.fullPath()[1:].replace("/", "_")
                     outfile = "%s.dat" % histo
-                    if opts.SMARTOUTPUT:
-                        outfile = "%s-%s" % (paper, outfile)
                     #print "Writing to", outfile
                     out = open(outfile, "w")
                     if not opts.GNUPLOT:

Modified: trunk/bin/flat2aida
==============================================================================
--- trunk/bin/flat2aida	Sun May  8 14:16:28 2011	(r3084)
+++ trunk/bin/flat2aida	Sun May  8 18:32:55 2011	(r3085)
@@ -1,7 +1,15 @@
 #! /usr/bin/env python
 
 """\
-%prog flatfile [flatfile2 ...]
+%prog [options] flatfile [flatfile2 ...]
+
+Convert make-plots data files to AIDA XML format. The output is by default
+written out to a file with the same name as the input (out.aida in the case of
+stdin) unless the --output option is specified. When specifying either input or
+output filenames, a '-' is used to refer to stdin or stdout as appropriate.
+
+Histograms can also be filtered by histo path, using the -m or -M options for a
+positive or negative regex pattern patch respectively.
 """
 
 import sys
@@ -10,145 +18,162 @@
     sys.exit(1)
 
 
-import os
-from htmlentitydefs import codepoint2name
-unichr2entity = dict((unichr(code), u'&%s;' % name) \
-                         for code,name in codepoint2name.iteritems() \
-                         if code != 38) # exclude "&"
-def htmlescape(text, d=unichr2entity):
-    if u"&" in text:
-        text = text.replace(u"&", u"&")
-    for key, value in d.iteritems():
-        if key in text:
-            text = text.replace(key, value)
-    return text
-
+import os, logging
+import lighthisto
 
 
-class Inputdata:
-    def __init__(self, filename):
-        self.histos = {}
-        self.description = {}
-        self.description['DrawOnly'] = []
-        f = open(filename+'.dat', 'r')
-        for line in f:
-            if (line.count('#',0,1)):
-                if (line.count('BEGIN HISTOGRAM')):
-                    title = line.split('BEGIN HISTOGRAM', 1)[1].strip()
-                    self.description['DrawOnly'].append(title)
-                    self.histos[title] = Histogram(f)
-                    if title:
-                        self.histos[title].path = title
-        f.close()
-
-class Histogram:
-    def __init__(self, f):
-        self.read_input(f)
-        self.path = None
-
-    def read_input(self, f):
-        self.description = {}
-        self.data = []
-        for line in f:
-            if (line.count('#',0,1)):
-                if (line.count('END HISTOGRAM')):
-                    break
-            else:
-                line = line.rstrip()
-                if (line.count('=')):
-                    linearray = line.split('=', 1)
-                    key = linearray[0].strip()
-                    val = linearray[1].strip()
-                    #print "@", key, val
-                    self.description[key] = val
-                else:
-                    linearray = line.split()
-                    if len(linearray)==4:
-                        self.data.append({'LowEdge': float(linearray[0]),
-                                          'UpEdge':  float(linearray[1]),
-                                          'Content': float(linearray[2]),
-                                          'Error':   [float(linearray[3]),float(linearray[3])]})
-                    elif len(linearray)==5:
-                        self.data.append({'LowEdge': float(linearray[0]),
-                                          'UpEdge':  float(linearray[1]),
-                                          'Content': float(linearray[2]),
-                                          'Error':   [float(linearray[3]),float(linearray[4])]})
-                    else:
-                        raise Exception("Unexpected line format: '%s'" % linearray)
-
-    def write_datapoint(self, f, xval, xerr, yval, yerr):
-        f.write('    <dataPoint>\n')
-        f.write('      <measurement errorPlus="%e" value="%e" errorMinus="%e"/>\n' %(xerr, xval, xerr))
-        f.write('      <measurement errorPlus="%e" value="%e" errorMinus="%e"/>\n' %(yerr[1], yval, yerr[0]))
-        f.write('    </dataPoint>\n')
-
-    def write_datapointset_header(self, f, count, bin):
-        path = '/REF/%s/d%02d-x01-y%02d' % (filename.split('/')[-1], count, bin+1)
-        if self.description.has_key("AidaPath"):
-            path = self.description["AidaPath"]
-        if not self.path:
-            self.path = path
-        f.write('  <dataPointSet name="%s" dimension="2"\n' % (os.path.basename(self.path)))
-        if not self.description.has_key('Title'):
-            self.description['Title'] = ""
-        f.write('    path="%s" title="%s">\n' % (os.path.dirname(self.path), htmlescape(self.description['Title'])))
-        if self.description.has_key("XLabel") and self.description["XLabel"] is not None:
-            f.write('    <dimension dim="0" title="%s" />\n' % htmlescape(self.description['XLabel']))
-        if self.description.has_key("YLabel") and self.description["YLabel"] is not None:
-            f.write('    <dimension dim="1" title="%s" />\n' % htmlescape(self.description['YLabel']))
-        f.write('    <annotation>\n')
-        f.write('      <item key="Title" value="%s" sticky="true"/>\n' %(htmlescape(self.description['Title'])))
-        f.write('      <item key="AidaPath" value="%s" sticky="true"/>\n' %(self.path))
-        f.write('      <item key="FullPath" value="/%s.aida%s" sticky="true"/>\n' %(filename.split('/')[-1], self.path))
-        f.write('    </annotation>\n')
-
-    def write_datapointset_footer(self, f):
-        f.write('  </dataPointSet>\n')
-
-    def write_datapointset(self, f, count):
-        if not opts.SPLITHISTOS:
-            self.write_datapointset_header(f, count, 0)
-        for bin, bindata in enumerate(self.data):
-            xval = 0.5*(bindata['UpEdge'] + bindata['LowEdge'])
-            if bindata['UpEdge'] == bindata['LowEdge']:
-                xerr = 0.5
-            else:
-                xerr = 0.5*(bindata['UpEdge'] - bindata['LowEdge'])
-            yval = bindata['Content']
-            yerr = bindata['Error']
-            if opts.SPLITHISTOS:
-                self.write_datapointset_header(f, count, bin)
-            self.write_datapoint(f, xval, xerr, yval, yerr)
-            if opts.SPLITHISTOS:
-                self.write_datapointset_footer(f)
-        if not opts.SPLITHISTOS:
-            self.write_datapointset_footer(f)
+##########################################################
 
 
 if __name__ == "__main__":
-    from optparse import OptionParser
+
+    ## Default plot file search paths
+    default_plotdirs = ["."]
+    try:
+        import rivet
+        default_plotdirs += rivet.getAnalysisPlotPaths()
+    except:
+        pass
+
+
+    ## Parse command line options
+    from optparse import OptionParser, OptionGroup
     parser = OptionParser(usage=__doc__)
+    parser.add_option("-o", "--output", default=None,
+                      help="Write all histos to a single output file. "
+                      "stdout can be explicitly specified by setting '-' as the output filename. This option will "
+                      "be disregarded if --split is specified.",
+                      dest="OUTPUT")
     parser.add_option("-s", "--split", action="store_true", default=False,
-                      help="Split histograms into individual files", dest="SPLITHISTOS")
+                      help="Split histograms into individual files", dest="SPLITOUTPUT")
+    parser.add_option("--plotinfodir", dest="PLOTINFODIR", action="append",
+                      default=default_plotdirs, help="directory which may contain plot header information")
+    parser.add_option("-m", "--match", action="append",
+                      help="Only write out histograms whose $path/$name string matches these regexes",
+                      dest="PATHPATTERNS")
+    parser.add_option("-M", "--unmatch", action="append",
+                      help="Exclude histograms whose $path/$name string matches these regexes",
+                      dest="PATHUNPATTERNS")
+    verbgroup = OptionGroup(parser, "Verbosity control")
+    verbgroup.add_option("-v", "--verbose", action="store_const", const=logging.DEBUG, dest="LOGLEVEL",
+                         default=logging.INFO, help="print debug (very verbose) messages")
+    verbgroup.add_option("-q", "--quiet", action="store_const", const=logging.WARNING, dest="LOGLEVEL",
+                         default=logging.INFO, help="be very quiet")
     opts, args = parser.parse_args()
 
+
+    ## Configure logging
+    logging.basicConfig(level=opts.LOGLEVEL, format="%(message)s")
+
+
+    ## Initialise steering variables which need a bit more care
+    import re
+    if opts.PATHPATTERNS is None:
+        opts.PATHPATTERNS = []
+    opts.PATHPATTERNS = [re.compile(r) for r in opts.PATHPATTERNS]
+    if opts.PATHUNPATTERNS is None:
+        opts.PATHUNPATTERNS = []
+    opts.PATHUNPATTERNS = [re.compile(r) for r in opts.PATHUNPATTERNS]
+
+
+    ## Check that at least one file has been supplied
     if len(args) < 1:
-        sys.stderr.write("Must specity at least one histogram file\n")
+        sys.stderr.write("Must specity at least one histogram file (or stdin)\n")
         sys.exit(1)
 
-    for flatfile in args:
-        filename = flatfile.replace(".dat", "")
 
-        inputdata = Inputdata(filename)
-
-        f = open(filename+'.aida', 'w')
-        f.write('<?xml version="1.0" encoding="ISO-8859-1" ?>\n')
-        f.write('<!DOCTYPE aida SYSTEM "http://aida.freehep.org/schemas/3.3/aida.dtd">\n')
-        f.write('<aida version="3.3">\n')
-        f.write('  <implementation version="1.1" package="FreeHEP"/>\n')
+    ## Add directories to the plotinfo path
+    for flatfile in args:
+        if flatfile != "-":
+            flatdir = os.path.dirname(flatfile)
+            if flatdir not in opts.PLOTINFODIR:
+                opts.PLOTINFODIR.append(flatdir)
+    ## Remove empty path entries
+    opts.PLOTINFODIR = filter(lambda s: len(s) > 0, opts.PLOTINFODIR)
+    ## Create plot file parser
+    plotparser = lighthisto.PlotParser(opts.PLOTINFODIR)
 
-        for i, d in enumerate(inputdata.description['DrawOnly']):
-            inputdata.histos[d].write_datapointset(f, i+1)
 
-        f.write('</aida>\n')
-        f.close
+    ## Run over the files and build histo objects selected by the pattern filtering
+    histos = {}
+    for flatfile in args:
+        if flatfile != "-" and not os.access(flatfile, os.R_OK):
+            logging.error("%s can not be read" % flatfile)
+            sys.exit(1)
+        try:
+            allhistos = lighthisto.Histo.fromFlat(flatfile)
+        except Exception, e:
+            logging.error("%s can not be parsed" % flatfile)
+            print e
+            sys.exit(1)
+
+        for histpath, hist in allhistos.iteritems():
+            useThis = True
+            if opts.PATHPATTERNS:
+                useThis = False
+                for regex in opts.PATHPATTERNS:
+                    if regex.search(histpath):
+                        useThis = True
+                        break
+            if useThis and opts.PATHUNPATTERNS:
+                for regex in opts.PATHUNPATTERNS:
+                    if regex.search(histpath):
+                        useThis = False
+                        break
+            if useThis:
+                try:
+                    plotparser.updateHistoHeaders(hist)
+                except ValueError, err:
+                    logging.debug(err)
+                histos.setdefault(flatfile, []).append(hist)
+
+
+    ## Write output
+    if histos:
+        ## Split output per-histogram
+        if opts.SPLITOUTPUT:
+            for f, hs in sorted(histos.iteritems()):
+                for h in sorted(hs):
+                    histo = h.fullPath()[1:].replace("/", "_")
+                    outfile = "%s.aida" % histo
+                    #print "Writing to", outfile
+                    out = open(outfile, "w")
+                    out.write('<?xml version="1.0" ?>\n')
+                    out.write('<!DOCTYPE aida SYSTEM "http://aida.freehep.org/schemas/3.3/aida.dtd">\n')
+                    out.write('<aida version="3.3">\n')
+                    out.write('  <implementation version="1.1" package="Rivet"/>\n')
+                    out.write(h.asAIDA())
+                    out.write('</aida>\n')
+                    out.close()
+        ## Write all output to a single file (stdout by default)
+        elif opts.OUTPUT:
+            outfile = opts.OUTPUT
+            if outfile == "-":
+                out = sys.stdout
+            else:
+                out = open(outfile, "w")
+            out.write('<?xml version="1.0" ?>\n')
+            out.write('<!DOCTYPE aida SYSTEM "http://aida.freehep.org/schemas/3.3/aida.dtd">\n')
+            out.write('<aida version="3.3">\n')
+            out.write('  <implementation version="1.1" package="Rivet"/>\n')
+            for f, hs in sorted(histos.iteritems()):
+                for h in sorted(hs):
+                    out.write(h.asAIDA())
+            out.write('</aida>\n')
+            if outfile != "-":
+                out.close()
+        ## Split output per-infile
+        else:
+            for f, hs in sorted(histos.iteritems()):
+                outfile = os.path.basename(f).replace(".dat", ".aida")
+                if f == "-":
+                    outfile = "out.dat"
+                out = open(outfile, "w")
+                out.write('<?xml version="1.0" ?>\n')
+                out.write('<!DOCTYPE aida SYSTEM "http://aida.freehep.org/schemas/3.3/aida.dtd">\n')
+                out.write('<aida version="3.3">\n')
+                out.write('  <implementation version="1.1" package="Rivet"/>\n')
+                for h in sorted(hs):
+                    out.write(h.asAIDA())
+                out.write('</aida>\n')
+                out.close()

Modified: trunk/pyext/lighthisto.py
==============================================================================
--- trunk/pyext/lighthisto.py	Sun May  8 14:16:28 2011	(r3084)
+++ trunk/pyext/lighthisto.py	Sun May  8 18:32:55 2011	(r3085)
@@ -1,8 +1,7 @@
 # Use posixpath instead of os.path for AIDA path handling to be platform
 # independent, i.e. always use "/" as path delimiter.
 import posixpath
-import os
-import re
+import os, sys, re
 
 
 from htmlentitydefs import codepoint2name
@@ -248,7 +247,7 @@
                         br[0] <= curran[1] <= br[1])):
                 new.addBin(b)
             else:
-                logging.debug("Chopping bin %s: %e" % (self.fullPath(), b.getBinCenter()))
+                sys.stderr.write("Chopping bin %s: %e\n" % (self.fullPath(), b.getBinCenter()))
         return new
 
     def renormalise(self, newarea):
@@ -321,15 +320,25 @@
 
 
     @classmethod
-    def fromFlat(cls, stringbuf):
-        """Build a histogram from a string buffer containing flat-format."""
+    def fromFlatHisto(cls, stringbuf):
+        """Build a histogram from its flat text representation.
+        """
         desc = {}
         new = cls()
-        for line in stringbuf:
-            line = line.rstrip()
-            if "=" in line:
-                linearray = line.split("=", 0)
+        for line in stringbuf.splitlines():
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            if 'BEGIN HISTOGRAM' in line:
+                fullpath = line.split('BEGIN HISTOGRAM', 1)[1].strip()
+                new.path = os.path.dirname(fullpath)
+                new.name = os.path.basename(fullpath)
+                continue
+            elif "=" in line:
+                linearray = line.split("=", 1)
                 desc[linearray[0]] = linearray[1]
+            elif 'END HISTOGRAM' in line:
+                break
             else:
                 linearray = line.split()
                 if len(linearray) == 4:
@@ -341,8 +350,10 @@
                                    float(linearray[2]),
                                    float(linearray[3]), float(linearray[4])))
                 else:
-                    logging.error("Unknown line format in '%s'" % (line))
-        new.path, new.name = posixpath.split(desc["AidaPath"])
+                    sys.stderr.write("Unknown line format in '%s'\n" % line)
+        ## Apply special annotations as histo obj attributes
+        if desc.has_key("AidaPath"):
+            new.path, new.name = posixpath.split(desc["AidaPath"])
         if desc.has_key("Title"):
             new.title = desc["Title"]
         if desc.has_key("XLabel"):
@@ -353,16 +364,51 @@
 
 
     @classmethod
+    def fromFlat(cls, path):
+        """Load all histograms in file 'path' into a histo-path=>histo dict.
+
+        The keys of the dictionary are the full paths of the histogram, i.e.
+        AnalysisID/HistoID, a leading "/REF" is stripped from the keys.
+        """
+        runhistos = dict()
+        if path == "-":
+            f = sys.stdin
+        else:
+            f = open(path, "r")
+        fullpath = None
+        s = ""
+        for line in f:
+            if "BEGIN HISTOGRAM" in line:
+                fullpath = line.split('BEGIN HISTOGRAM', 1)[1].strip()
+                # TODO: Really? Here?
+                if fullpath.startswith("/REF"):
+                    fullpath = fullpath[4:]
+            if fullpath:
+                s += line
+                if "END HISTOGRAM" in line:
+                    runhistos[fullpath] = cls.fromFlatHisto(s)
+                    ## Reset for next histo
+                    fullpath = None
+                    s = ""
+        if f is not sys.stdin:
+            f.close()
+        return runhistos
+
+
+    @classmethod
     def fromAIDA(cls, path):
         """Load all histograms in file 'path' into a histo-path=>histo dict.
 
         The keys of the dictionary are the full paths of the histogram, i.e.
-        AnaylsisID/HistoID, a leading "/REF" is stripped from the keys.
+        AnalysisID/HistoID, a leading "/REF" is stripped from the keys.
+
+        TODO: /REF stripping should really happen in user code...
         """
         runhistos = dict()
         tree = ET.parse(path)
         for dps in tree.findall("dataPointSet"):
             fullpath = posixpath.join(dps.get("path"), dps.get("name"))
+            # TODO: Really? Here?
             if fullpath.startswith("/REF"):
                 fullpath = fullpath[4:]
             runhistos[fullpath] = cls.fromDPS(dps)


More information about the Rivet-svn mailing list