2
* Copyright (C) 2004-2007 The Chemistry Development Kit (CDK) project
4
* Contact: cdk-devel@lists.sourceforge.net
6
* This program is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public License
8
* as published by the Free Software Foundation; either version 2.1
9
* of the License, or (at your option) any later version.
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this program; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21
package org.openscience.cdk.qsar.model.R;
23
import org.omegahat.R.Java.REvaluator;
24
import org.omegahat.R.Java.ROmegahatInterpreter;
25
import org.openscience.cdk.qsar.model.IModel;
26
import org.openscience.cdk.qsar.model.QSARModelException;
27
import org.openscience.cdk.tools.LoggingTool;
31
/** Base class for modeling classes that use R as the backend.
33
* This cannot be directly instantiated as its sole function is
34
* to initialize the SJava system and source R matcher/converter
35
* functions into the loaded R session. The class variable <code>revaluator</code>
36
* can be accessed from subclasses to make calls to the R session.
38
* Any class that builds models using R should be a subclass of this.
40
* An important feature to note when using the R backend is that the SJava
41
* initialization must be done only <b>once</b> in a Java thread. As a result
42
* when any model class based on RModel is instantiated the constructor for the
43
* super class (i.e., Rmodel) makes sure that SJava is not already initialized.
45
* By default the intialization uses a temporary file which is sourced in the
46
* R session. In some cases, such as web applications, temporary files might be
47
* problematic. In this case the R backend can be initialized via strings. To
48
* do this the application should specify <b>-DinitRFromString=true</b> on the command
49
* line. Note that this approach will be slightly slower compared to initializsation
50
* via a temporary file.
52
* <b>NOTE</b>: For the R backend to work, ensure that R is correctly installed
53
* and that SJava is also installed, using the -c option. Finally, ensure
54
* that the R_HOME environment variable points to the R installation.
57
* @author Rajarshi Guha
58
* @cdk.require r-project
62
public abstract class RModel implements IModel {
64
private String modelName = null;
67
* The object that performs the calls to the R engine.
69
public static REvaluator revaluator = null;
71
* This object represents an instance of the R interpreter.
73
* Due to the design of R, only one interpreter can be instantiated in a given
74
* thread. That is, the underlying R engine is not thread safe. As a result
75
* care must be taken to have only one instance of the interpreter.
77
public static ROmegahatInterpreter interp = null;
80
* A boolean that indicates whether the R/Java subsystem has been initialized or not.
82
private static boolean doneInit = false;
83
private LoggingTool logger;
85
private void loadRFunctions(REvaluator evaluator) {
86
String scriptLocator = "org/openscience/cdk/qsar/model/data/cdkSJava.R";
88
File scriptFile = File.createTempFile("XXXXX",".R");
89
scriptFile.deleteOnExit();
91
InputStreamReader reader = new InputStreamReader(
92
this.getClass().getClassLoader().getResourceAsStream(scriptLocator));
93
BufferedReader inFile = new BufferedReader(reader);
95
FileWriter outFile = new FileWriter(scriptFile);
96
BufferedWriter outBuffer = new BufferedWriter(outFile);
98
while ( (inputLine = inFile.readLine()) != null) {
99
outBuffer.write(inputLine,0,inputLine.length());
106
evaluator.voidEval("source(\""+scriptFile.getAbsolutePath()+"\")");
108
} catch (Exception exception) {
109
logger.error("Could not load CDK-SJava R script: ", scriptLocator);
110
logger.debug(exception);
114
private void loadRFunctionsAsStrings(REvaluator evaluator) {
118
"cnn_3.R", "cnn_4.R",
122
String scriptPrefix = "org/openscience/cdk/qsar/model/data/";
123
for (int i = 0; i < scripts.length; i++) {
125
String scriptLocator = scriptPrefix + scripts[i];
127
InputStreamReader reader = new InputStreamReader(
128
this.getClass().getClassLoader().getResourceAsStream(scriptLocator));
129
BufferedReader inFile = new BufferedReader(reader);
131
StringWriter sw = new StringWriter();
133
while ( (inputLine = inFile.readLine()) != null) {
139
evaluator.voidEval("eval(parse(text=\""+sw.toString()+"\"))");
141
} catch (Exception exception) {
142
logger.error("Could not load CDK-SJava R scripts: ", scriptLocator);
143
logger.debug(exception);
151
* Initializes SJava and R with the specified command line arguments (see R documentation).
153
* This constructor will initialize the R session via a temporary file
155
* @param args A String[] containing the command line parameters as elements
157
public RModel(String[] args) {
158
logger = new LoggingTool(this);
160
String initRFromString = System.getProperty("initRFromString");
161
boolean useDisk = true;
162
if (initRFromString != null && initRFromString.equals("true")) {
167
RModel.interp = new ROmegahatInterpreter(ROmegahatInterpreter.fixArgs(args), false);
168
RModel.revaluator = new REvaluator();
171
loadRFunctions(RModel.revaluator);
172
logger.info("Initializing from disk");
174
loadRFunctionsAsStrings(RModel.revaluator);
175
logger.info("Initializing from strings");
179
logger.info("SJava initialized");
181
logger.info("SJava already initialized");
186
* Initializes SJava with the <i>--vanilla, -q, --slave</i> flags.
188
* This constructor will initialize the R session via a temporary file
191
String[] args = {"--vanilla","-q", "--slave"};
192
logger = new LoggingTool(this);
194
String initRFromString = System.getProperty("initRFromString");
195
boolean useDisk = true;
196
if (initRFromString != null && initRFromString.equals("true")) {
201
RModel.interp = new ROmegahatInterpreter(ROmegahatInterpreter.fixArgs(args), false);
202
RModel.revaluator = new REvaluator();
205
loadRFunctions(RModel.revaluator);
206
logger.info("Initializing from disk");
208
loadRFunctionsAsStrings(RModel.revaluator);
209
logger.info("Initializing from strings");
213
logger.info("SJava initialized");
215
logger.info("SJava already initialized");
221
* Saves a R model to disk.
223
* This function can be used to save models built in a session, and then loaded
224
* again in a different session.
226
* @param modelname The name of the model as returned by \code{getModelName}.
227
* @param filename The file to which the model should be saved
228
* @throws QSARModelException if the R session cannot save the model
231
public static void saveModel(String modelname, String filename) throws QSARModelException {
232
if (filename.equals("") || filename == null) {
233
filename = modelname+".rda";
235
//Boolean result = null;
237
revaluator.call("saveModel",
238
new Object[] { (Object)modelname, (Object)filename });
239
} catch (Exception e) {
240
System.out.println("Caught the exception");
241
throw new QSARModelException("Error saving model");
247
* Get the name of the model.
249
* This function returns the name of the variable that the actual
250
* model is stored in within the R session. In general this is
251
* not used for the end user. In the future this might be changed
252
* to a private method.
254
* @return A String containing the name of the R variable
257
public String getModelName() {
258
return(this.modelName);
262
* Set the name of the model.
264
* Ordinarily the user does not need to call this function as each model
265
* is assigned a unique ID at instantiation. However, if a user saves a model
266
* to disk and then later loads it, the loaded
267
* model may overwrite a model in that session. In this situation, this method
268
* can be used to assign a name to the model.
270
* @param newName The name of the model
276
public void setModelName(String newName) {
277
if (this.modelName != null && this.modelName.equals(newName)) return;
278
String oldName = this.modelName;
279
if (oldName != null) {
280
revaluator.voidEval("if ('"+oldName+"' %in% ls()) {"+newName+"<-"+oldName+";rm("+oldName+")}");
282
this.modelName = newName;
285
abstract public void build() throws QSARModelException;
286
abstract public void predict() throws QSARModelException;
289
* Specifies the parameters value.
291
* @param key A String representing the name of the parameter (corresponding to the
292
* name described in the R manpages)
293
* @param obj The value of the parameter
294
* @throws QSARModelException if the parameters are of the wrong type for the given modeling function
297
abstract public void setParameters(String key, Object obj) throws QSARModelException;
300
* Abstract method to handle loading R models.
302
* This method can be used to load a previously saved R model object. Since
303
* the user can save any arbitrary R object, checks must be made that the
304
* object being returned is an instance of one of the current modeling classes.
306
* This is best achieved by forcing each modeling class to write its own loader.
308
* @param fileName The file containing the R object to load
309
* @throws QSARModelException if the R session could not load the object or if the loaded model
310
* does not correspond to the class that it was loaded from
313
abstract public void loadModel(String fileName) throws QSARModelException;
315
* Abstract method to handle loading R models that were previously serialized.
317
* This method can be used to load a previously serialized R model object (usinging
318
* serialize()). Since
319
* the user can save any arbitrary R object, checks must be made that the
320
* object being returned is an instance of one of the current modeling classes.
321
* This is best achieved by forcing each modeling class to write its own loader.
324
* objects saved using serialize() do not have a name. As a result a name for the object must
325
* be specified when using this method.
327
* @param serializedModel A String containing the ASCII sreialized R object
328
* @param modelName The name of the model. (Within the R session, the model will be assigned to
329
* a variable of this name)
330
* @throws QSARModelException if the R session could not load the object or if the loaded model
331
* does not correspond to the class that it was loaded from
334
abstract public void loadModel(String serializedModel, String modelName) throws QSARModelException;