5
## Last modified: 2013-03-08
7
## Python script for parsing NWChem output for TDDFT/vspec excitation
8
## energies, and optionally Lorentzian broadenening the spectra. For
17
from optparse import OptionParser
24
"""Check version and ensure new print and string formatting is
25
allowed. Raises and exception if not satisfied."""
27
if sys.version_info < (2, 6):
28
raise Exception("This script requires python >= 2.6")
31
newstring = "oldstring {v}".format(v=3.14)
33
raise Exception("This script requires string.format()")
37
return (1.0 / 27.2114) * e_ev
45
return 27.2114 * 2.0 * 2.99792 * 2.41888 * 3.14159265359 / e_ev
48
def determine_data_type ():
49
"""Parses stdin to see what data type, then rewinds stdin.
50
Returns 'vspec', 'tddft', or raises and Exception if neither
51
found. It choses based on the first tag found in the input, so
52
files with multiple data will only find the 1st. To extract the
53
2nd, manually specify the data format via the command line args."""
55
tag_tddft = "NWChem TDDFT Module"
56
tag_vspec = "DFT Virtual Spectrum"
58
lines = sys.stdin.readlines()
64
elif tag_vspec in line:
68
raise Exception ("Failed to determine data format, please specify manually.")
71
def parse_input_vspec (opts):
72
"""Parses input from vspec and returns excitation energies in the
73
form [energy, f], in eV and atomic units units, respectively."""
75
lines = sys.stdin.readlines ()
87
raise Exception ("Failed to parse <START> tag and number: {0}".format(l))
100
line_split = l.strip().split()
101
n = int (line_split[0])
102
occ = int (line_split[1]) #not used
103
virtual = int (line_split[2]) #not used
104
energy_ev = float (line_split[3])
105
osc = float (line_split[7])
107
raise Exception ("Failed to parse data line: {0}".format(l))
109
iexcite = iexcite + 1
112
raise Exception ("Expected excitation number {0}, found {1}".format(iexcite, n))
116
print ("{0} Warning: Ignored negative vpsec excitation: {1} eV, {2}".format(opts.cchar, energy_ev, osc))
119
sys.stderr.write ("Warning: Ignored negative vpsec excitation: {0} eV, {1}\n".format(energy_ev, osc))
121
roots.append ([energy_ev, osc])
124
# if not inside_data:
125
# raise Exception ("Failed to find <START> tag")
127
if iexcite != nexcite:
128
print ("{0} Warning: Expected {1} excitations, found {2}".format(opts.cchar, nexcite, iexcite))
131
sys.stderr.write ("Warning: Expected {0} excitations, found {1}\n".format(nexcite,iexcite))
135
sys.stderr.write ("{0}: Found {1} vspec excitations\n".format(pname, len(roots)))
141
def parse_input_evals (opts):
142
"""Parses input for eigenvalues and return as a list"""
144
start_tag = "DFT Final Molecular Orbital Analysis"
145
end_tag = "Task times cpu"
149
lines = sys.stdin.readlines ()
160
if start_tag in line:
166
if inside and "Vector" in line and "Occ" in line:
167
line_strip = line.strip()
170
tagloc = line_strip.rfind("E=")
171
evalue_str = line_strip[tagloc+2:tagloc+15].replace("D", "E") # after E= ; replace D with E
172
evalue = float(evalue_str)
174
raise Exception ("Failed to parse eigenvalue: {0}".format(line_strip))
176
eval_ev = au2ev (evalue)
177
evals.append(eval_ev) #store eigenvalue in eV
181
sys.stderr.write ("{0}: Found {1} eigenvalues\n".format(pname, len(evals)))
186
def bin_evals (opts, evals):
187
"""Take eigenvalues and bins them, and return [energy, N], where N
188
is the number of eigenvalues in the energy bin centered around
191
## they should be sorted but let's make sure
192
evals_sort = sorted(evals)
195
emax = evals_sort[-1]
197
de = (emax - emin) / opts.nbin
199
#=== XXX HARDCODE ===
201
# opts.nbin = int((emax - emin)/de) + 1
205
for ie in range(opts.nbin+1):
206
ecenter = emin + ie*de
207
eleft = ecenter - 0.5*de
208
eright = ecenter + 0.5*de
211
for val in evals_sort:
212
if val >= eleft and val <= eright:
215
dos_raw.append ([ecenter, count])
218
## check that total sum is number of eigenvalues
223
if ntot != len (evals):
224
raise Exception ("Inconsistent integrated DOS and number of eigenvalues: {0} vs {1}".format(ntot, len(evals)))
229
def parse_input_tddft (opts):
230
"""Parses input for singlet TDDFT excitation energies.
231
Returns excitation energies in the form [energy, f], in eV and
232
atomic units units, respectively."""
234
start_tag = "Convergence criterion met"
235
end_tag = "Excited state energy"
236
max_osc_search = 10 #max number of lines after root energy to look for oscillator strength
239
inside = False #true when we are inside output block
241
lines = sys.stdin.readlines()
254
if start_tag in line:
260
if inside and "Root" in line and "eV" in line:
262
line_strip = line.strip()
265
## Note, I would ideally .split() the line directly, but you
266
## can have cases like Root356 (for three digit root numbers)
267
## so I have to do it this ugly way...
270
line_start = line_strip[0:4] # should contain RootXXX
271
line_data = line_strip[7:].split()
272
line_n = line_strip[4:7] #contains integer root number
273
line_e = line_data[-2] # should contain excitation energy
274
line_ev_tag = line_data[-1] # should contain "eV"
276
raise Exception ("Failed to parse data line for root: {0}".format(line_strip))
278
# print (line_start, line_n, line_data) #debugging
280
if line_start == "Root" and line_ev_tag == "eV":
283
energy_ev = float(line_e)
285
raise Exception ("Failed to convert root values: {0}".format(line_strip))
287
raise Exception ("Unexpected format for root: {0}".format(line_strip))
291
## Now look for oscillator strength, which will be a few
292
## lines down (though the exact position may vary it seems).
296
ioscline = ioscline + 1
297
if ioscline >= max_osc_search:
298
raise Exception ("Failed to find oscillator strength after looking {0} lines.".format(ioscline))
300
oscline = lines[iline + ioscline].strip()
302
if "Dipole Oscillator Strength" in oscline:
304
osc_str = oscline.split()
305
osc = float (osc_str[3])
307
raise Exception ("Failed to convert oscillator strength: {0}".format(oscline))
311
## do some final checks, then append to data
313
raise Exception ("Invalid negative energy: {0}".format(energy_ev))
316
raise Exception ("Invalid negative oscillator strength: {0}".format(osc))
318
roots.append([energy_ev, osc])
324
raise Exception ("Failed to find any TDDFT roots")
327
print ("{0} Successfully parsed {1} TDDFT singlet excitations".format(opts.cchar,nroots))
331
sys.stderr.write ("{0}: Found {1} TDDFT excitations\n".format(pname, nroots))
337
def make_energy_list (opts, roots):
338
"""Computes the list of spectrum energies, and potentially adjusts
341
epad = 20.0*opts.width
343
emin = roots[0][0] - epad
345
# if emin < opts.width:
348
emax = roots[-1][0] + epad
349
de = (emax - emin) / opts.npoints
351
## Use width of at least two grid points
352
if opts.width < 2*de:
354
print ("{0} Warning: Forced broadening to be {1} eV".format(opts.cchar, opts.width))
357
sys.stderr.write ("Warning: Forced broadening to be {0} eV\n".format(opts.width))
359
# opts.width = max (opts.width, 2*de)
361
eout = [ emin + ie*de for ie in range(opts.npoints) ]
366
# def lorentzian_broaden (opts, roots):
367
# """Broadens raw roots into spectrum and returns the result."""
370
# cutoff = 15.0*opts.width
373
# ## multiply by 0.5 as FWHM was supplied by gamma in lorenzian is
376
# gamma = 0.5 * opts.width
377
# gamma_sqrd = gamma*gamma
381
# ## L(w; w0, gamma) = gamma/pi * 1 / [(w-w0)^2 + gamma^2]
383
# prefac = gamma/3.14159265359*de
386
# # spectrum = [ [emin + ie*de, 0] for ie in range(opts.npoints)]
388
# # for point in spectrum:
390
# # for root in roots:
391
# # xx0 = point[0] - root[0]
393
# # if abs(xx0) <= cutoff:
394
# # stot += prefac * root[1] / ( xx0*xx0 + gamma_sqrd) #Lorentzian
399
# # npoints_made = len(spectrum)
400
# # if npoints_made != opts.npoints:
401
# # raise Exception ("Spectrum should have {0} points, instead has {1}".format(opts.npoints, npoints_made))
404
# # print ("{0} Lorentzian broadened roots with width {1} eV".format(opts.cchar, opts.width))
405
# # print ("{0} Created spectrum with {1} points".format(opts.cchar, npoints_made))
411
## Faster way--use a generator
412
def gen_spectrum (opts, energies, roots):
413
"""Generator for making Lorentzian broadenend spectrum."""
416
# cutoff = 15.0*opts.width
417
cutoff = 20.0*opts.width
418
# cutoff = 9999999.0*opts.width
422
## L(w; w0, gamma) = gamma/pi * 1 / [(w-w0)^2 + gamma^2]
425
## multiply by 0.5 as FWHM was supplied by gamma in
426
## lorenzian is actually HWHM.
428
gamma = 0.5 * opts.width
429
gamma_sqrd = gamma*gamma
431
de = (energies[-1] - energies[0]) / (len(energies)-1)
432
prefac = gamma/3.14159265359*de
436
## used for verbose output (ie see progress for very large runs)
438
# checkpt_energies = []
440
# for ie in range(ne):
442
# checkpt_energies.append(energies[ie])
445
for energy in energies:
448
# if energy in checkpt_energies:
449
# sys.stderr.write ("XXX\n")
454
xx0 = energy - root[0]
456
if abs(xx0) <= cutoff:
457
stot += root[1] / ( xx0*xx0 + gamma_sqrd) #Lorentzian
459
yield [energy, stot*prefac]
465
def dump_header (opts):
466
"""Print header to stdout"""
469
print ("{0} ================================".format(opts.cchar))
470
print ("{0} NWChem spectrum parser ver {1}".format(opts.cchar,ver))
471
print ("{0} ================================".format(opts.cchar))
472
print ("{0} ".format(opts.cchar))
473
print ("{0} Parser runtime options: {1}".format(opts.cchar,opts))
474
print ("{0}".format(opts.cchar))
477
def dump_data (opts, roots):
478
"""Dumps output to stdout. This works for either lists of raw roots
479
or broadened spectra."""
482
sys.stderr.write ("{0}: Dumping data to stdout ... \n".format(pname))
484
if opts.units == "ev":
486
print ("{0}".format(opts.cchar))
487
print ("{c}{s1}{delim}{s2}".format(c=opts.cchar, s1=" Energy [eV] ", delim=opts.delim, s2=" Abs. [au] "))
488
print ("{0}----------------------------------".format(opts.cchar))
491
print ("{energy:.10e}{delim}{osc:.10e}".format(energy=root[0], delim=opts.delim, osc=root[1]))
494
elif opts.units == "au":
496
print ("{0}".format(opts.cchar))
497
print ("{c}{s1}{delim}{s2}".format(c=opts.cchar, s1=" Energy [au] ", delim=opts.delim, s2=" Abs. [au] "))
498
print ("{0}----------------------------------".format(opts.cchar))
501
eout = ev2au (root[0])
502
print ("{energy:.10e}{delim}{osc:.10e}".format(energy=eout, delim=opts.delim, osc=root[1]))
504
elif opts.units == "nm":
506
print ("{0}".format(opts.cchar))
507
print ("{c}{s1}{delim}{s2}".format(c=opts.cchar, s1=" Wavelen. [nm] ", delim=opts.delim, s2=" Abs. [au] "))
508
print ("{0}----------------------------------".format(opts.cchar))
510
# roots.reverse () #reorder so we output in increasing wavelength
513
eout = ev2nm (root[0])
514
print ("{energy:.10e}{delim}{osc:.10e}".format(energy=eout, delim=opts.delim, osc=root[1]))
517
raise Exception ("Invalid unit: {0}".format(opts.units))
520
def preprocess_check_opts (opts):
521
"""Check options and replaces with sane values if needed, stores
522
all opts as purely lowercase."""
524
opts.units = opts.units.lower()
525
opts.datafmt = opts.datafmt.lower()
527
if opts.units != "nm" and opts.units != "ev" and opts.units != "au":
528
raise Exception ("Invalid unit type: {0}".format(opts.units))
530
if opts.datafmt != "tddft" and opts.datafmt != "vspec" and opts.datafmt != "auto" and opts.datafmt != "dos":
531
raise Exception ("Invalid data format: {0}".format(opts.datafmt))
533
if opts.npoints < 100:
534
raise Exception ("Increase number of points to at least 100 (you asked for {0})".format(opts.npoints))
537
raise Exception ("Peak width must be positive (you supplied {0})".format(opts.width))
543
## Parse command line options. Note we later make all lowercase,
544
## so from the user's point of view they are case-insensitive.
546
usage = "%prog [options]\n\n"
547
desc = "Reads NWChem output from stdin, parses for the linear response TDDFT or DFT vspec excitations, and prints the absorption spectrum to stdout. It will optionally broaden peaks using a Lorentzian with FWHM of at least two energy/wavelength spacings. By default, it will automatically determine data format (tddft or vspec) and generate a broadened spectrum in eV."
548
desc_wrap = textwrap.wrap (desc,80)
552
example = 'Create absorption spectrum in nm named "spectrum.dat" from the NWChem output file "water.nwo" named spectrum.dat with peaks broadened by 0.3 eV and 5000 points in the spectrum.'
553
example_wrap = textwrap.wrap (example,80)
554
usage += "\nExample:\n\n\t"+"nw_spectrum -b0.3 -p5000 -wnm < water.nwo > spectrum.dat\n\n"
555
for s in example_wrap:
559
parser = OptionParser(usage=usage)
560
parser.add_option("-f", "--format", type="string", dest="datafmt",
561
help="data file format: auto (default), tddft, vspec, dos", metavar="FMT")
562
parser.add_option("-b", "--broad", type="float", dest="width",
563
help="broaden peaks (FWHM) by WID eV (default 0.1 eV)", metavar="WID")
564
parser.add_option("-n", "--nbin", type="int", dest="nbin",
565
help="number of eigenvalue bins for DOS calc (default 20)", metavar="NUM")
566
parser.add_option("-p", "--points", type="int", dest="npoints",
567
help="create a spectrum with NUM points (default 2000)", metavar="NUM")
568
parser.add_option("-w", "--units", type="string", dest="units",
569
help="units for frequency: eV (default), au, nm", metavar="UNT")
570
parser.add_option("-d", "--delim", type="string", dest="delim",
571
help="use STR as output separator (four spaces default)", metavar="STR")
572
parser.add_option("-x", "--extract", action="store_false", dest="makespec",
573
help="extract unbroadened roots; do not make spectrum")
574
parser.add_option("-C", "--clean", action="store_false", dest="header",
575
help="clean output; data only, no header or comments")
576
parser.add_option("-c", "--comment", type="string", dest="cchar",
577
help="comment character for output ('#' default)", metavar="CHA")
578
parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
579
help="echo warnings and progress to stderr")
582
parser.set_defaults(width = 0.1, npoints = 2000, units="eV", datafmt="auto", delim=" ", makespec=True, header=True, cchar="#", nbin=20, verbose=False)
583
(opts, args) = parser.parse_args()
585
preprocess_check_opts(opts)
591
if opts.datafmt == "auto":
592
opts.datafmt = determine_data_type ()
593
if opts.datafmt == "tddft":
594
print ("{0} The input appears to contain TDDFT data.".format(opts.cchar))
595
elif opts.datafmt == "vspec":
596
print ("{0} The input appears to contain vspec data.".format(opts.cchar))
598
raise Exception ("Invalid data format: {0}".format(opts.datafmt))
602
if opts.datafmt == "tddft":
603
roots = parse_input_tddft (opts)
604
elif opts.datafmt == "vspec":
605
roots = parse_input_vspec (opts)
606
elif opts.datafmt == "dos":
607
evals = parse_input_evals (opts)
608
roots = bin_evals (opts, evals)
610
raise Exception ("Invalid data format supplied: {0}".format(opts.datafmt))
613
## make broadened spectrum if desired
615
energies = make_energy_list (opts, roots)
616
spectrum = gen_spectrum (opts, energies, roots)
619
sys.stderr.write ("{0}: Initialized spectrum [{1: .3f} : {2: .3f}] ({3} points)\n".format(pname, energies[0], energies[-1], len(energies)))
622
print ("{0} Roots were Lorentzian broadened with width {1} eV ".format(opts.cchar, opts.width))
623
print ("{0} Spectrum generated with {1} points.".format(opts.cchar, opts.npoints))
628
print ("{0} Roots not broadened".format(opts.cchar))
629
print ("{0} No spectrum generated: output is list of raw excitations".format(opts.cchar))
632
## finally, dump data to stdout
633
dump_data (opts, spectrum)
636
sys.stderr.write ("{0}: Done\n".format(pname))
639
if __name__ == "__main__":