[HepData-svn] r1334 - trunk/hepdata-migration/scripts

blackhole at projects.hepforge.org blackhole at projects.hepforge.org
Wed Jan 6 14:02:15 GMT 2010
Previous message: [HepData-svn] r1333 - in trunk: hepdata-model/src/main/java/cedar/hepdata/model hepdata-webapp/src/main/java/cedar/hepdata/formats hepdata-webapp/src/main/java/cedar/hepdata/webapp/pages
Next message: [HepData-svn] r1335 - trunk/hepdata-migration/src/main/java/cedar/hepdata/migration
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Author: whalley
Date: Wed Jan  6 14:02:15 2010
New Revision: 1334

Log:
latest working version of mkhepml

Modified:
   trunk/hepdata-migration/scripts/mkhepml

Modified: trunk/hepdata-migration/scripts/mkhepml
==============================================================================
--- trunk/hepdata-migration/scripts/mkhepml	Mon Jan  4 09:33:30 2010	(r1333)
+++ trunk/hepdata-migration/scripts/mkhepml	Wed Jan  6 14:02:15 2010	(r1334)
@@ -206,8 +206,11 @@
     centvalmean_re = re.compile(r"(" + pmnum_patt + r")\s*TO\s*(" + pmnum_patt
                                 + r")\s*\(\s*MEAN\s*=\s*(" + pmnum_patt + r")\s*\).*", re.I)
     ## e.g. 0.01 TO 0.02
+    lohival1x_re = re.compile(r"(" + pmnum_patt + r")\s*TO\s*(" + pmnum_patt + r")$", re.I)
+    ## e.g. 0.01 TO 0.02 (....
     lohival1_re = re.compile(r"(" + pmnum_patt + r")\s*TO\s*(" + pmnum_patt + r").*", re.I)
     ## e.g. 0.01 - 0.02
+    ## e.g. 0.01 - 0.02
     lohival2_re = re.compile(r"(" + pmnum_patt + r")\s*-\s*(" + pmnum_patt + r").*")
     ## e.g. 0.1 foo
     centvaletc_re = re.compile(r"(" + pmnum_patt + r")\s*\(.*\)")
@@ -218,6 +221,8 @@
     ## for  eg 8.4 (HW = +1.6,-1.6);
     hwasybin_re = re.compile(r"(" + pmnum_patt + r")\s*\(\s*HW\s*=\s*("+ pmnum_patt + r")\s*,\s*("+ pmnum_patt + r")\s*\)")
 
+    ##eg val +e1,-e2
+    centvalppmm_re = re.compile(r"(" + pmnum_patt + r")\s*\+("+ num_patt + r")\s*,\s*\-("+ num_patt + r")\s*") 
 
     ## For splitting property value strings
     splitname_re = re.compile(r"(.*)\s*IN\s*(.*)")
@@ -257,7 +262,8 @@
     paperdata = {}
     ## This is the new HepData ID code:
     papertag=hepml.find("paper")
-    papertag["hepdataId"] = options.PAPERID
+# this next line has to be removed if not doing complete migration
+#    papertag["hepdataId"] = options.PAPERID
     ## Can't set these properly until we've read the legacy paper dump files:
     paperdata["spiresId"] = None
     paperdata["redId"] = None
@@ -418,6 +424,8 @@
                     refs[numrefs] = ref
                     reftag = Tag(hepml,"reference") 
                     reftag["description"] = ref
+                    reftag["type"] = typ
+                    reftag["date"] = year
                     papertag.insert(len(papertag), reftag)
 
 
@@ -529,7 +537,8 @@
                 dsinfo["legacy_dsid"]  = dsid
                 dsinfo["comment"]  = comment
                 dsinfo["location"] = location
-                dsinfo["plab"]     = [plab1, plab2] ## TODO: property
+                ## dsinfo["plab"]     = [plab1, plab2] ## TODO: property
+                dsinfo["plab"]     = plab1 +"," + plab2 
                 dsinfo["qsq"]      = [qsq1, qsq2] ## TODO: property
                 dsinfo["nu"]       = [nu1, nu2] ## TODO: property
                 dsinfo["k"]        = [k1, k2] ## TODO: property
@@ -562,6 +571,10 @@
                 commenttag = Tag(hepml, "comment")
                 commenttag.insert(0, escapeHTML("Location: " + dsdata[i][j]["location"]))
                 dstag.insert(len(dstag), commenttag)
+            if len(dsdata[i][j]["plab"]) > 0:
+                plabtag = Tag(hepml, "plab")
+                plabtag.insert(0, escapeHTML(dsdata[i][j]["plab"]))
+                dstag.insert(len(dstag), plabtag)
             papertag.insert(len(papertag), dstag)
             if options.DEBUG:
                 print "Adding dataset tag"
@@ -715,6 +728,7 @@
 
                 ## Parse value for error info
                 centralvalue, lowvalue, highvalue, reln = None, None, None, None
+		description = None
                 try:
                     if centval_re.match(valuestr):
                         m = centval_re.match(valuestr)
@@ -730,15 +744,34 @@
                         diff = float( m.group(2) )
                         lowvalue = centralvalue - diff
                         highvalue = centralvalue + diff
+                    elif centvalppmm_re.match(valuestr):
+                        m = centvalppmm_re.match(valuestr)
+                        centralvalue = float( m.group(1) )
+                        diff1 = float( m.group(2) )
+                        diff2 = float( m.group(3) )
+                        lowvalue = centralvalue - diff2
+                        highvalue = centralvalue + diff1
                     elif centvalmean_re.match(valuestr):
                         m = centvalmean_re.match(valuestr)
                         centralvalue = float( m.group(3) )
                         lowvalue = float( m.group(1) )
                         highvalue = float( m.group(2) )
+                    elif lohival1x_re.match(valuestr):
+                        m = lohival1x_re.match(valuestr)
+                        lowvalue = float( m.group(1) )
+                        highvalue = float( m.group(2) )
+                        lowhigh =  m.group(0)
+                        print "lowhival1 low  used  %s" % lowvalue
+                        print "lowhival1 high used  %s" % highvalue 
+                        print " valuestr is         %s" % valuestr 
                     elif lohival1_re.match(valuestr):
                         m = lohival1_re.match(valuestr)
                         lowvalue = float( m.group(1) )
                         highvalue = float( m.group(2) )
+                        lowhigh =  m.group(0)
+                        print "lowhival1 low  used  %s" % lowvalue
+                        print "lowhival1 high used  %s" % highvalue 
+                        print " valuestr is         %s" % valuestr 
                     elif lohival2_re.match(valuestr):
                         m = lohival2_re.match(valuestr)
                         lowvalue = float( m.group(1) )
@@ -777,10 +810,12 @@
                         m = centvaletc_re.match(valuestr)
                         centralvalue = float( m.group(1) )
                     else:
+		        description = valuestr
                         print "WARNING: x-val didn't match any known pattern: %s" % valuestr
                 except Exception, e:
-                    print "ERROR: problem during x-val parsing of '%s'" % valuestr
-                    raise e
+                   print "xxx" , centralvalue
+                   print "ERROR: problem during x-val parsing of '%s'" % valuestr
+                   raise e
                 if centralvalue is not None:
                     bintag["focus"] = centralvalue
                 if lowvalue is not None:
@@ -789,9 +824,11 @@
                     bintag["high"] = highvalue
                 if reln is not None:
                     bintag["relation"] = reln
+                if description is not None:
+                    bintag["description"] = description
 
                 ## If this bin has no position information, ignore it
-                if centralvalue is None and lowvalue is None and highvalue is None:
+                if centralvalue is None and lowvalue is None and highvalue is None and description is None:
                     continue
 
                 ## Handle comments
@@ -1101,6 +1138,54 @@
 
                     yproptag = Tag(hepml, "property")
                     yproptag["name"]  = name
+		    if unit == "GEV/NUCLEON":
+		        unit = "GEV"
+		    if unit == "GEV/NUCLEONV":
+		        unit = "GEV"
+		    if unit == "GEV/NUCLEUS":
+		        unit = "GEV"
+		    if unit == "DEGREES":
+		        unit = "DEG"
+		    if unit == "DEG.":
+		        unit = "DEG"
+		    if unit == "RADIANS":
+		        unit = "RAD"
+		    if unit == "MRAD.":
+		        unit = "MRAD"
+		    if unit == "GEV)":
+		        unit = "GEV"
+		    if unit == "GEV.":
+		        unit = "GEV"
+		    if unit == "GEC":
+		        unit = "GEV"
+		    if unit == "(GEV/C)**2":
+		        unit = "GEV**2"
+		    if unit == "GEV**2)":
+		        unit = "GEV**2"
+		    if unit == "MEV)":
+		        unit = "MEV"
+		    if unit == "1/FM**2":
+		        unit = "FM**-2"
+		    if unit == "1/PB":
+		        unit = "PB**-1"
+		    if unit == "SEC":
+		        unit = "S"
+		    if unit == "M/SEC":
+		        unit = "M/S"
+		    if unit == "MSR":
+		        unit = ""
+		    if unit == "ME V":
+		        unit = "MEV"
+		    if unit == "2TW)":
+		        unit = ""
+		    if unit == "G(NAME=LAMBDA)":
+		        unit = ""
+		    if unit == "G(NAME=DELTA(K)":
+		        unit = ""
+		    if unit == "G":
+		        unit = ""
+		    if unit == "/A":
+		        unit = ""
                     yproptag["unit"]  = unit
                     if loval is not None: 
                         yproptag["low"] = loval
@@ -1182,6 +1267,54 @@
                     for yaxistag in dstag.findAll("yaxis"):
                         yproptag = Tag(hepml,"property")
                         yproptag["name"] = name
+		        if unit == "GEV/NUCLEON":
+		            unit = "GEV"
+		        if unit == "GEV/NUCLEONV":
+		            unit = "GEV"
+		        if unit == "GEV/NUCLEUS":
+		            unit = "GEV"
+		        if unit == "DEGREES":
+		            unit = "DEG"
+		        if unit == "DEG.":
+		            unit = "DEG"
+		        if unit == "RADIANS":
+		            unit = "RAD"
+		        if unit == "MRAD.":
+		            unit = "MRAD"
+		        if unit == "GEV)":
+		            unit = "GEV"
+		        if unit == "GEV.":
+		            unit = "GEV"
+		        if unit == "GEC":
+		            unit = "GEV"
+		        if unit == "(GEV/C)**2":
+		            unit = "GEV**2"
+		        if unit == "GEV**2)":
+		            unit = "GEV**2"
+		        if unit == "MEV)":
+		            unit = "MEV"
+		        if unit == "1/FM**2":
+		            unit = "FM**-2"
+		        if unit == "1/PB":
+		            unit = "PB**-1"
+		        if unit == "SEC":
+		            unit = "S"
+		        if unit == "M/SEC":
+		            unit = "M/S"
+		        if unit == "MSR":
+		            unit = ""
+		        if unit == "ME V":
+		            unit = "MEV"
+		        if unit == "2TW)":
+		            unit = ""
+		        if unit == "G(NAME=LAMBDA)":
+		            unit = ""
+		        if unit == "G(NAME=DELTA(K)":
+		            unit = ""
+		        if unit == "G":
+		            unit = ""
+		        if unit == "/A":
+		            unit = ""
                         yproptag["unit"] = unit
                         yproptag["error"] = error
                         if loval is not None: 
@@ -1248,9 +1381,11 @@
                 pointtagid = "pt-" + paperid + "-" + dsid + "-" + yaxisid + "-" + pointid
                 pointtag["id"] = pointtagid
                 pointtag["pointId"] = pointid
-                if valuestr is not "":
+                if valuestr != "":
                     pointtag["value"] = float(valuestr)
-
+                else:
+                    pointtag["value"] = "0"
+                
                 ## Handle relation inequalities etc.
                 if relstr == "=":
                     pointtag["relation"] = "eq"
@@ -1309,13 +1444,23 @@
                 pointid  = vals[3].strip()
                 errid    = vals[4].strip()
                 valup    = vals[5].strip().replace(' ','')
-                if not valup:
+                valdn    = vals[6].strip().replace(' ','')
+                if valup.startswith("-") and valdn.startswith("+"):
+                    print "swopping valup/dn"
+                    valup    = vals[6].strip().replace(' ','')
+                    valdn    = vals[5].strip().replace(' ','')
+                if not valup or valup == "?":
                     valup = "0.0"
                     print "Missing point error valup entry for ", paperid, dsid, yaxisid, pointid, ": using 0.0"
-                valdn = vals[6].strip().replace(' ','')
-                if not valdn:
+                if valup == "+LIM":
+                    valup = "0.0"
+                    print "Limiting point error valup entry for ", paperid, dsid, yaxisid, pointid, ": using 0.0"
+                if not valdn or valdn == "?":
                     valdn = valup
                     print "Missing point error valdn entry for ", paperid, dsid, yaxisid, pointid, ": using valup"
+                if valdn == "-LIM":
+                    valdn = "0.0"
+                    print "Limiting point error valdn entry for ", paperid, dsid, yaxisid, pointid, ": using 0.0"
                 type     = vals[7].strip()
                 if type == "":
                     type = "UNKNOWN";
@@ -1614,43 +1759,43 @@
 
         ## Add extra point errors (from sysmatch files)
 
-        ## fix to deal with duplicate entries in extra_point_errors
-        temp_extra_point_errors = extra_point_errors
-    #    del extra_point_errors;
-        extra_point_errors = []
-        o_errtag = None
-        for errtag in temp_extra_point_errors:
-            if(errtag != o_errtag):
-                extra_point_errors.append(errtag)
-                print str(errtag)
-            o_errtag = errtag
-
-        ## back on track again....
-        for errtag in extra_point_errors:
-            paperid = errtag["paperId"]
-            dsid    = errtag["dsId"]
-            yaxisid = errtag["yaxisId"]
-            pointid = errtag["pointId"]
-            ## Find the y-axis. Build one if it doesn't already exist
-            print "INFO: Adding an extra point error to y-axis %s-%s-%s-%s, point " % (paperid, dsid, yaxisid, pointid)
-            yaxistag = forceGetYAxisTag(papertag, paperid, dsid, yaxisid)
-            ## Depending on hierarchy choice, we might have to make a pointerrors tag
-            if options.STRICT_HIER:
-                pointtagid = "pt-" + paperid + "-" + dsid + "-" + yaxisid + "-" + pointid
-                pointtag = yaxistag.find("point", id=pointtagid)
-                del errtag["pointId"]
-                pointtag.insert(len(pointtag), errtag)
-            else:
-                pointerrstag = yaxistag.find("pointerrors")
-                if not pointerrstag:
-                    pointerrstag = Tag(hepml, "pointerrors")
-                    yaxistag.insert(len(yaxistag), pointerrstag)
-                try:
-                    pointerrstag.insert(len(pointerrstag), errtag)
-    ##              pointerrstag.insert(len(pointerrstag), errtag)
-                except:
-                    print 'WARNING: must be a duplicate'
-
+###        ## fix to deal with duplicate entries in extra_point_errors
+###        temp_extra_point_errors = extra_point_errors
+###    #    del extra_point_errors;
+###        extra_point_errors = []
+###        o_errtag = None
+###        for errtag in temp_extra_point_errors:
+###            if(errtag != o_errtag):
+###                extra_point_errors.append(errtag)
+###                print str(errtag)
+###            o_errtag = errtag
+###
+###        ## back on track again....
+###        for errtag in extra_point_errors:
+###            paperid = errtag["paperId"]
+###            dsid    = errtag["dsId"]
+###            yaxisid = errtag["yaxisId"]
+###            pointid = errtag["pointId"]
+###            ## Find the y-axis. Build one if it doesn't already exist
+###            print "INFO: Adding an extra point error to y-axis %s-%s-%s-%s, point " % (paperid, dsid, yaxisid, pointid)
+###            yaxistag = forceGetYAxisTag(papertag, paperid, dsid, yaxisid)
+###            ## Depending on hierarchy choice, we might have to make a pointerrors tag
+###            if options.STRICT_HIER:
+###                pointtagid = "pt-" + paperid + "-" + dsid + "-" + yaxisid + "-" + pointid
+###                pointtag = yaxistag.find("point", id=pointtagid)
+###                del errtag["pointId"]
+###                pointtag.insert(len(pointtag), errtag)
+###            else:
+###                pointerrstag = yaxistag.find("pointerrors")
+###                if not pointerrstag:
+###                    pointerrstag = Tag(hepml, "pointerrors")
+###                    yaxistag.insert(len(yaxistag), pointerrstag)
+###                try:
+###                    pointerrstag.insert(len(pointerrstag), errtag)
+###    ##              pointerrstag.insert(len(pointerrstag), errtag)
+###                except:
+###                    print 'WARNING: must be a duplicate'
+###
 
 
     if options.TIME :
Previous message: [HepData-svn] r1333 - in trunk: hepdata-model/src/main/java/cedar/hepdata/model hepdata-webapp/src/main/java/cedar/hepdata/formats hepdata-webapp/src/main/java/cedar/hepdata/webapp/pages
Next message: [HepData-svn] r1335 - trunk/hepdata-migration/src/main/java/cedar/hepdata/migration
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the HepData-svn mailing list