5
data_dir = '/home/brett/devel/bauble/data/'
6
families_file = data_dir + 'csv/Family.txt'
7
genera_file = data_dir + 'csv/Genera.txt'
8
checklist_file = data_dir + 'old/belize_plants.txt'
9
species_columns='"genusID","sp","sp_author","infrasp_rank","infrasp","infrasp_author","sp_hybrid"'
11
# synonyms to use for the checklist genera
12
#generic_synonyms = {'Adenocalymna', Adenocalymma Mart. ex Meisn.
14
# a plant class with support for parsing a plant name string
15
# to contruct the object
17
def __init__(self, genus=None, species=None, infrasp_rank=None,
18
infrasp=None, cv=None):
19
self.genus = genus or ""
20
self.species = species or ""
21
self.species_author = ""
22
self.infrasp_rank = infrasp_rank or ""
23
self.infrasp_author = ""
24
self.infrasp = infrasp or ""
26
# self.is_cv = '' # HACK for this file only
29
def match(self, species):
30
partsList = re.split("(?:subsp\.)+|(?:var\.)+", species)
31
speciesPart = partsList[0].strip()
33
# ** match species part
34
# look for .sp, meaning it is not identified and should only
36
if speciesPart.find(" sp.") != -1:
37
self.genus = re.match("(?P<genus>[\w]*)\s+",
38
speciesPart).group("genus");
42
"""(?P<genus>[\w]*)\s+ # match the genus
43
(?P<hybrid>x?)\s? # hybrid sign
44
(?P<species>[\w-]*)\s? # match the species
46
speciesPart, re.VERBOSE)
48
self.genus = m.group("genus")
49
self.species = m.group("species")
50
self.hybrid = m.group("hybrid")
51
self.species_author = m.group("author")
53
# check for infrasp_rank
54
if species.find("subsp.") != -1:
55
self.infrasp_rank = "subsp."
56
elif species.find("var.") != -1:
57
self.infrasp_rank = "var."
59
if self.infrasp_rank is not "":
60
infraspPart = partsList[1].strip();
62
"""\A(?P<infrasp>[\w]*)\s?
63
(?P<infrasp_author>.*)""", infraspPart, re.VERBOSE)
64
self.infrasp = m.group("infrasp")
65
self.infrasp_author = m.group("infrasp_author")
67
# return a dict with key, value pairs for each member that has a value
68
# don't return key/values if the string is ""
69
#http://vsbabu.org/mt/archives/2003/02/13/joy_of_python_classes_and_dictionaries.html
70
#return dict([(k, v) for (k, v) in o.__dict__.items if not k.startswith('_'+o.__class__.__name__)])
71
# TODO: i'm not sure if this works
74
"""Return a dictionary from object that has public
78
#Joy: all the attributes in a class are already in __dict__
79
privatePrefix = "_" + self.__class__.__name__
80
for elem in self.__dict__.keys():
81
if elem.find(privatePrefix) == 0:
83
#We discard private variables, which are automatically
84
#named _ClassName__variablename, when we define it in
85
#the class as __variablename
86
elif self.__dict__[elem] != "":
88
#dict[elem] = self.__dict__[elem].encode("latin-1")
89
dict[elem] = self.__dict__[elem]
93
#dict[elem] = str(self.__dict__[elem]).encode("latin-1")
100
if self.species is not None:
101
s += " " + self.species
103
if self.infrasp_rank is not None:
104
s += " " + self.infrasp_rank + " " + self.infrasp
105
# if self.cv is not None:
110
# should be able to set field separator, field encloser, and some fields
111
# should be options like family, authors, hyrbid, etc..
112
def csv(self, with_family=False):
114
print out in comma separated values format with the following fields:
115
genus, species, species_author, infrasp_rank, infrasp, infrasp_author, cv, hybrid
118
ft = "," # field terminated
119
enclosed = '"' # field enclosed
120
field = lambda x: '%s%s%s' % (enclosed, x, enclosed)
121
if with_family is True:
122
csvStr += field(self.family) + ft
123
if isinstance(self.genus, int):
124
csv += str(self.genus) + ft
125
else: csv+= field(self.genus) + ft
128
csv += field(self.species) + ft + \
129
field(self.species_author) + ft + \
130
field(self.infrasp_rank) + ft + \
131
field(self.infrasp) + ft + field(self.infrasp_author)
132
except UnicodeDecodeError, e:
133
print sys.stderr.write(e)
136
# there are no cultivars in the belize checklist
137
# if self.cv is not "":
138
# csv += ft + field(" cv. " + self.cv)
140
# csv += ft+ field('') + ft
141
csv += ft + field(self.hybrid)
142
#if self.hybrid is not '':
150
###################################################
153
# first parse the kew_genera.txt file for the genus id->name map,
154
# TODO: there is one genus in the file that has a duplicate name, find it
155
# and make sure that we don't use it in the checklist
160
for line in csv.reader(open(genera_file)):
161
gen_dict[line[2]] = line[0]
163
# print out a first line since it will be skipped
164
print plantname_columns
166
for line in open(checklist_file).readlines():
168
if line == "": continue
170
if line.find(" ") == -1 or line.find(":") != -1:
173
elif line.startswith('*'):
176
#continue # ******************* for now skip cultivated material
180
if p.genus not in gen_dict:
181
if p.genus not in missing:
182
missing[p.genus] = []
183
missing[p.genus].append(str(p))
184
elif p.species == "":
185
bad_lines.append(line)
186
continue # skip Prescottia sp. style names
188
p.genus = int(gen_dict[p.genus])
193
sys.stderr.write("******* could not find the following genera *******\n")
194
for gen, sp in missing.iteritems():
195
sys.stderr.write('%s: %s\n' % (gen, sp))
198
if len(bad_lines) > 0:
199
sys.stderr.write('******* could do anything with the following lines: *******\n')
201
sys.stderr.write(b + '\n')