~ubuntu-branches/ubuntu/saucy/fastqc/saucy-proposed

« back to all changes in this revision

Viewing changes to uk/ac/babraham/FastQC/Modules/OverRepresentedSeqs.java

  • Committer: Package Import Robot
  • Author(s): Andreas Tille
  • Date: 2012-11-20 13:38:32 UTC
  • Revision ID: package-import@ubuntu.com-20121120133832-psohzlsak64g7bdy
Tags: upstream-0.10.1+dfsg
ImportĀ upstreamĀ versionĀ 0.10.1+dfsg

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/**
 
2
 * Copyright Copyright 2010-12 Simon Andrews
 
3
 *
 
4
 *    This file is part of FastQC.
 
5
 *
 
6
 *    FastQC is free software; you can redistribute it and/or modify
 
7
 *    it under the terms of the GNU General Public License as published by
 
8
 *    the Free Software Foundation; either version 3 of the License, or
 
9
 *    (at your option) any later version.
 
10
 *
 
11
 *    FastQC is distributed in the hope that it will be useful,
 
12
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 
13
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
14
 *    GNU General Public License for more details.
 
15
 *
 
16
 *    You should have received a copy of the GNU General Public License
 
17
 *    along with FastQC; if not, write to the Free Software
 
18
 *    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
19
 */
 
20
package uk.ac.babraham.FastQC.Modules;
 
21
 
 
22
import java.awt.BorderLayout;
 
23
import java.util.ArrayList;
 
24
import java.util.Arrays;
 
25
import java.util.HashMap;
 
26
import java.util.Iterator;
 
27
import java.util.List;
 
28
 
 
29
import javax.swing.JLabel;
 
30
import javax.swing.JPanel;
 
31
import javax.swing.JScrollPane;
 
32
import javax.swing.JTable;
 
33
import javax.swing.table.AbstractTableModel;
 
34
import javax.swing.table.TableModel;
 
35
 
 
36
import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
 
37
import uk.ac.babraham.FastQC.Sequence.Sequence;
 
38
import uk.ac.babraham.FastQC.Sequence.Contaminant.ContaminantHit;
 
39
import uk.ac.babraham.FastQC.Sequence.Contaminant.ContaminentFinder;
 
40
 
 
41
public class OverRepresentedSeqs implements QCModule {
 
42
 
 
43
        protected HashMap<String, Integer>sequences = new HashMap<String, Integer>();
 
44
        protected int count = 0;
 
45
        private OverrepresentedSeq [] overrepresntedSeqs = null;
 
46
        private boolean calculated = false;
 
47
        private boolean frozen = false;
 
48
        private DuplicationLevel duplicationModule;
 
49
        
 
50
        // This is the number of different sequences we want to track
 
51
        private final int OBSERVATION_CUTOFF = 200000;
 
52
        // This is a count of how many unique sequences we've seen so far
 
53
        // so we know when to stop adding them.
 
54
        private int uniqueSequenceCount = 0;
 
55
        // This was the total count at the point at which we saw our total
 
56
        // number of unique sequences, so we know what to correct by when
 
57
        // extrapolating to the whole file
 
58
        protected int countAtUniqueLimit = 0;
 
59
        
 
60
        
 
61
        public OverRepresentedSeqs () {
 
62
                duplicationModule = new DuplicationLevel(this);
 
63
        }
 
64
        
 
65
        public String description() {
 
66
                return "Identifies sequences which are overrepresented in the set";
 
67
        }
 
68
        
 
69
        public boolean ignoreFilteredSequences() {
 
70
                return true;
 
71
        }
 
72
        
 
73
        public DuplicationLevel duplicationLevelModule () {
 
74
                return duplicationModule;
 
75
        }
 
76
 
 
77
        public JPanel getResultsPanel() {
 
78
                JPanel returnPanel = new JPanel();
 
79
                returnPanel.setLayout(new BorderLayout());
 
80
                returnPanel.add(new JLabel("Overrepresented sequences",JLabel.CENTER),BorderLayout.NORTH);
 
81
                
 
82
                if (!calculated) getOverrepresentedSeqs();
 
83
                
 
84
                if (overrepresntedSeqs.length > 0) {
 
85
                        TableModel model = new ResultsTable(overrepresntedSeqs);
 
86
                        JTable table = new JTable(model);
 
87
                        table.setCellSelectionEnabled(true);
 
88
                        returnPanel.add(new JScrollPane(table),BorderLayout.CENTER);
 
89
                }
 
90
                else {
 
91
                        returnPanel.add(new JLabel("There are no overrepresented sequences",JLabel.CENTER),BorderLayout.CENTER);
 
92
                }
 
93
                
 
94
                return returnPanel;
 
95
        
 
96
        }
 
97
        
 
98
        public DuplicationLevel getDuplicationLevelModule () {
 
99
                return duplicationModule;
 
100
        }
 
101
        private synchronized void getOverrepresentedSeqs () {
 
102
 
 
103
                // If the duplication module hasn't already done
 
104
                // its calculation it needs to do it now before
 
105
                // we stomp all over the data
 
106
                duplicationModule.calculateLevels();
 
107
                
 
108
                Iterator<String> s = sequences.keySet().iterator();
 
109
                List<OverrepresentedSeq>keepers = new ArrayList<OverrepresentedSeq>();
 
110
                
 
111
                while (s.hasNext()) {
 
112
                        String seq = s.next();
 
113
                        double percentage = ((double)sequences.get(seq)/count)*100;
 
114
                        if (percentage > 0.1) {
 
115
                                OverrepresentedSeq os = new OverrepresentedSeq(seq, sequences.get(seq), percentage);
 
116
                                keepers.add(os);
 
117
                        }
 
118
                }
 
119
                
 
120
                overrepresntedSeqs = keepers.toArray(new OverrepresentedSeq[0]);
 
121
                Arrays.sort(overrepresntedSeqs);
 
122
                calculated  = true;
 
123
                sequences.clear();
 
124
                
 
125
        }
 
126
        
 
127
        public void reset () {
 
128
                count = 0;
 
129
                sequences.clear();
 
130
        }
 
131
 
 
132
        public String name() {
 
133
                return "Overrepresented sequences";
 
134
        }
 
135
 
 
136
        public void processSequence(Sequence sequence) {
 
137
                
 
138
                calculated = false;
 
139
                
 
140
                ++count;
 
141
                
 
142
                // Since we rely on identity to match sequences we can't trust really long
 
143
                // sequences, so anything over 75bp gets truncated to 50bp.
 
144
                String seq = sequence.getSequence();
 
145
                if (seq.length() > 75) {
 
146
                        seq = new String(seq.substring(0, 50));
 
147
                }
 
148
                                
 
149
                if (sequences.containsKey(seq)) {
 
150
                        sequences.put(seq, sequences.get(seq)+1);
 
151
                }
 
152
                else {
 
153
                        if (! frozen) {
 
154
                                sequences.put(seq, 1);
 
155
                                ++uniqueSequenceCount;
 
156
                                countAtUniqueLimit = count;
 
157
                                if (uniqueSequenceCount == OBSERVATION_CUTOFF) {
 
158
                                        frozen = true;
 
159
                                }
 
160
 
 
161
                        }
 
162
                }               
 
163
        }
 
164
        
 
165
        private class ResultsTable extends AbstractTableModel {
 
166
                
 
167
                private OverrepresentedSeq [] seqs;
 
168
                
 
169
                public ResultsTable (OverrepresentedSeq [] seqs) {
 
170
                        this.seqs = seqs;
 
171
                }
 
172
                
 
173
                
 
174
                // Sequence - Count - Percentage
 
175
                public int getColumnCount() {
 
176
                        return 4;
 
177
                }
 
178
 
 
179
                public int getRowCount() {
 
180
                        return seqs.length;
 
181
                }
 
182
 
 
183
                public Object getValueAt(int rowIndex, int columnIndex) {
 
184
                        switch (columnIndex) {
 
185
                                case 0: return seqs[rowIndex].seq();
 
186
                                case 1: return seqs[rowIndex].count();
 
187
                                case 2: return seqs[rowIndex].percentage();
 
188
                                case 3: return seqs[rowIndex].contaminantHit();
 
189
                                        
 
190
                        }
 
191
                        return null;
 
192
                }
 
193
                
 
194
                public String getColumnName (int columnIndex) {
 
195
                        switch (columnIndex) {
 
196
                                case 0: return "Sequence";
 
197
                                case 1: return "Count";
 
198
                                case 2: return "Percentage";
 
199
                                case 3: return "Possible Source";
 
200
                        }
 
201
                        return null;
 
202
                }
 
203
                
 
204
                public Class<?> getColumnClass (int columnIndex) {
 
205
                        switch (columnIndex) {
 
206
                        case 0: return String.class;
 
207
                        case 1: return Integer.class;
 
208
                        case 2: return Double.class;
 
209
                        case 3: return String.class;
 
210
                }
 
211
                return null;
 
212
                        
 
213
                }
 
214
        }
 
215
        
 
216
        private class OverrepresentedSeq implements Comparable<OverrepresentedSeq>{
 
217
                
 
218
                private String seq;
 
219
                private int count;
 
220
                private double percentage;
 
221
                private ContaminantHit contaminantHit;
 
222
                
 
223
                public OverrepresentedSeq (String seq, int count, double percentage) {
 
224
                        this.seq = seq;
 
225
                        this.count = count;
 
226
                        this.percentage = percentage;
 
227
                        this.contaminantHit = ContaminentFinder.findContaminantHit(seq);
 
228
                }
 
229
                
 
230
                public String seq () {
 
231
                        return seq;
 
232
                }
 
233
                
 
234
                public int count () {
 
235
                        return count;
 
236
                }
 
237
                
 
238
                public double percentage () {
 
239
                        return percentage;
 
240
                }
 
241
                
 
242
                public String contaminantHit () {
 
243
                        if (contaminantHit == null) {
 
244
                                return "No Hit";
 
245
                        }
 
246
                        else {
 
247
                                return contaminantHit.toString();
 
248
                        }
 
249
                }
 
250
 
 
251
                public int compareTo(OverrepresentedSeq o) {
 
252
                        return o.count-count;
 
253
                }
 
254
        }
 
255
 
 
256
        public boolean raisesError() {
 
257
                if (!calculated) getOverrepresentedSeqs();
 
258
                if (overrepresntedSeqs.length>0) {
 
259
                        if (overrepresntedSeqs[0].percentage > 1) {
 
260
                                return true;
 
261
                        }
 
262
                }
 
263
                return false;
 
264
        }
 
265
 
 
266
        public boolean raisesWarning() {
 
267
                if (!calculated) getOverrepresentedSeqs();
 
268
 
 
269
                if (overrepresntedSeqs.length > 0) return true;
 
270
                return false;
 
271
        }
 
272
 
 
273
        public void makeReport(HTMLReportArchive report) {
 
274
                if (!calculated) getOverrepresentedSeqs();
 
275
                ResultsTable table = new ResultsTable(overrepresntedSeqs);
 
276
                
 
277
                StringBuffer b = report.htmlDocument();
 
278
                StringBuffer d = report.dataDocument();
 
279
                
 
280
                if (overrepresntedSeqs.length == 0) {
 
281
                        b.append("<p>No overrepresented sequences</p>\n");
 
282
                }
 
283
                
 
284
                else {
 
285
                        b.append("<table>\n");
 
286
                        // Do the headers
 
287
                        b.append("<tr>\n");
 
288
                        d.append("#");
 
289
                        for (int c=0;c<table.getColumnCount();c++) {
 
290
                                b.append("<th>");
 
291
                                b.append(table.getColumnName(c));
 
292
                                d.append(table.getColumnName(c));
 
293
                                b.append("</th>\n");
 
294
                                if (c<table.getColumnCount()-1) {
 
295
                                        d.append("\t");
 
296
                                }
 
297
                        }
 
298
                        b.append("</tr>\n");
 
299
                        d.append("\n");
 
300
                        
 
301
                        // Do the rows
 
302
                        for (int r=0;r<table.getRowCount();r++) {
 
303
                                b.append("<tr>\n");
 
304
                                for (int c=0;c<table.getColumnCount();c++) {
 
305
                                        b.append("<td>");
 
306
                                        b.append(table.getValueAt(r, c));
 
307
                                        d.append(table.getValueAt(r, c));
 
308
                                        b.append("</td>\n");
 
309
                                        if (c<table.getColumnCount()-1) {
 
310
                                                d.append("\t");
 
311
                                        }
 
312
                                }
 
313
                                b.append("</tr>\n");
 
314
                                d.append("\n");
 
315
                        }
 
316
                        
 
317
                        b.append("</table>\n");
 
318
                }       
 
319
        }
 
320
 
 
321
}