2
* Copyright Copyright 2010-12 Simon Andrews
4
* This file is part of FastQC.
6
* FastQC is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 3 of the License, or
9
* (at your option) any later version.
11
* FastQC is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
16
* You should have received a copy of the GNU General Public License
17
* along with FastQC; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
package uk.ac.babraham.FastQC.Modules;
22
import java.awt.Graphics;
23
import java.awt.image.BufferedImage;
24
import java.io.IOException;
25
import java.util.Arrays;
26
import java.util.HashMap;
27
import java.util.zip.ZipEntry;
28
import java.util.zip.ZipOutputStream;
30
import javax.imageio.ImageIO;
31
import javax.swing.JPanel;
33
import uk.ac.babraham.FastQC.Graphs.LineGraph;
34
import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
35
import uk.ac.babraham.FastQC.Sequence.Sequence;
36
import uk.ac.babraham.FastQC.Sequence.QualityEncoding.PhredEncoding;
38
public class PerSequenceQualityScores implements QCModule {
40
private HashMap<Integer, Long> averageScoreCounts = new HashMap<Integer, Long>();
41
private double [] qualityDistribution = null;
42
private int [] xCategories = new int[0];
43
private char lowestChar = 126;
44
private int maxCount = 0;
45
private int mostFrequentScore;
46
private boolean calculated = false;
48
public JPanel getResultsPanel() {
50
if (!calculated) calculateDistribution();
52
return new LineGraph(new double [][] {qualityDistribution}, 0d, maxCount, "Mean Sequence Quality (Phred Score)",new String [] {"Average Quality per read"}, xCategories, "Quality score distribution over all sequences");
56
private synchronized void calculateDistribution () {
58
PhredEncoding encoding = PhredEncoding.getFastQEncodingOffset(lowestChar);
60
Integer [] rawScores = averageScoreCounts.keySet().toArray(new Integer [0]);
61
Arrays.sort(rawScores);
63
// We'll run from the lowest to the highest
64
qualityDistribution = new double [1+(rawScores[rawScores.length-1]-rawScores[0])] ;
66
xCategories = new int[qualityDistribution.length];
68
for (int i=0;i<qualityDistribution.length;i++) {
69
xCategories[i] = (rawScores[0]+i)-encoding.offset();
70
if (averageScoreCounts.containsKey(rawScores[0]+i)) {
71
qualityDistribution[i] = averageScoreCounts.get(rawScores[0]+i);
76
for (int i=0;i<qualityDistribution.length;i++) {
77
if (qualityDistribution[i]>maxCount) {
78
maxCount = (int)qualityDistribution[i];
79
mostFrequentScore = xCategories[i];
86
public void processSequence(Sequence sequence) {
87
char [] seq = sequence.getQualityString().toCharArray();
88
int averageQuality = 0;
90
for (int i=0;i<seq.length;i++) {
91
if (seq[i] < lowestChar) {
94
averageQuality += seq[i];
98
averageQuality /= seq.length;
100
if (averageScoreCounts.containsKey(averageQuality)) {
101
long currentCount = averageScoreCounts.get(averageQuality);
103
averageScoreCounts.put(averageQuality, currentCount);
106
averageScoreCounts.put(averageQuality, 1L);
111
public void reset () {
112
averageScoreCounts.clear();
118
public String description() {
119
return "Shows the distribution of average quality scores for whole sequences";
122
public String name() {
123
return "Per sequence quality scores";
126
public boolean raisesError() {
127
if (!calculated) calculateDistribution();
129
if (mostFrequentScore <=20) return true;
134
public boolean raisesWarning() {
135
if (!calculated) calculateDistribution();
137
if (mostFrequentScore <=27) return true;
142
public void makeReport(HTMLReportArchive report) throws IOException {
143
if (!calculated) calculateDistribution();
145
ZipOutputStream zip = report.zipFile();
146
zip.putNextEntry(new ZipEntry(report.folderName()+"/Images/per_sequence_quality.png"));
148
BufferedImage b = new BufferedImage(800,600,BufferedImage.TYPE_INT_RGB);
149
Graphics g = b.getGraphics();
151
LineGraph lg = new LineGraph(new double [][] {qualityDistribution}, 0d, maxCount, "Mean Sequence Quality (Phred Score)", new String [] {"Average Quality per read"}, xCategories, "Quality score distribution over all sequences");
154
ImageIO.write((BufferedImage)(b),"PNG",zip);
156
StringBuffer sb = report.htmlDocument();
158
sb.append("<p><img class=\"indented\" src=\"Images/per_sequence_quality.png\" alt=\"Per Sequence quality graph\"></p>\n");
160
sb = report.dataDocument();
161
sb.append("#Quality\tCount\n");
162
for (int i=0;i<xCategories.length;i++) {
163
sb.append(xCategories[i]);
165
sb.append(qualityDistribution[i]);
171
public boolean ignoreFilteredSequences() {