001/* =========================================================== 002 * JFreeChart : a free chart library for the Java(tm) platform 003 * =========================================================== 004 * 005 * (C) Copyright 2000-2014, by Object Refinery Limited and Contributors. 006 * 007 * Project Info: http://www.jfree.org/jfreechart/index.html 008 * 009 * This library is free software; you can redistribute it and/or modify it 010 * under the terms of the GNU Lesser General Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but 015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 017 * License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this library; if not, write to the Free Software 021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 022 * USA. 023 * 024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 025 * Other names may be trademarks of their respective owners.] 026 * 027 * --------------- 028 * Statistics.java 029 * --------------- 030 * (C) Copyright 2000-2014, by Matthew Wright and Contributors. 031 * 032 * Original Author: Matthew Wright; 033 * Contributor(s): David Gilbert (for Object Refinery Limited); 034 * 035 * Changes (from 08-Nov-2001) 036 * -------------------------- 037 * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG); 038 * Moved from JFreeChart to package com.jrefinery.data.* in 039 * JCommon class library (DG); 040 * 24-Jun-2002 : Removed unnecessary local variable (DG); 041 * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG); 042 * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG); 043 * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG); 044 * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0 045 * release (DG); 046 * 02-Jul-2013 : Use ParamChecks (DG); 047 * 048 */ 049 050package org.jfree.data.statistics; 051 052import java.util.ArrayList; 053import java.util.Collection; 054import java.util.Collections; 055import java.util.Iterator; 056import java.util.List; 057import org.jfree.chart.util.ParamChecks; 058 059/** 060 * A utility class that provides some common statistical functions. 061 */ 062public abstract class Statistics { 063 064 /** 065 * Returns the mean of an array of numbers. This is equivalent to calling 066 * {@code calculateMean(values, true)}. 067 * 068 * @param values the values ({@code null} not permitted). 069 * 070 * @return The mean. 071 */ 072 public static double calculateMean(Number[] values) { 073 return calculateMean(values, true); 074 } 075 076 /** 077 * Returns the mean of an array of numbers. 078 * 079 * @param values the values ({@code null} not permitted). 080 * @param includeNullAndNaN a flag that controls whether or not 081 * {@code null} and {@code Double.NaN} values are included 082 * in the calculation (if either is present in the array, the result is 083 * {@link Double#NaN}). 084 * 085 * @return The mean. 086 * 087 * @since 1.0.3 088 */ 089 public static double calculateMean(Number[] values, 090 boolean includeNullAndNaN) { 091 092 ParamChecks.nullNotPermitted(values, "values"); 093 double sum = 0.0; 094 double current; 095 int counter = 0; 096 for (int i = 0; i < values.length; i++) { 097 // treat nulls the same as NaNs 098 if (values[i] != null) { 099 current = values[i].doubleValue(); 100 } 101 else { 102 current = Double.NaN; 103 } 104 // calculate the sum and count 105 if (includeNullAndNaN || !Double.isNaN(current)) { 106 sum = sum + current; 107 counter++; 108 } 109 } 110 double result = (sum / counter); 111 return result; 112 } 113 114 /** 115 * Returns the mean of a collection of {@code Number} objects. 116 * 117 * @param values the values ({@code null} not permitted). 118 * 119 * @return The mean. 120 */ 121 public static double calculateMean(Collection values) { 122 return calculateMean(values, true); 123 } 124 125 /** 126 * Returns the mean of a collection of {@code Number} objects. 127 * 128 * @param values the values ({@code null} not permitted). 129 * @param includeNullAndNaN a flag that controls whether or not 130 * {@code null} and {@code Double.NaN} values are included 131 * in the calculation (if either is present in the array, the result is 132 * {@link Double#NaN}). 133 * 134 * @return The mean. 135 * 136 * @since 1.0.3 137 */ 138 public static double calculateMean(Collection values, 139 boolean includeNullAndNaN) { 140 141 ParamChecks.nullNotPermitted(values, "values"); 142 int count = 0; 143 double total = 0.0; 144 Iterator iterator = values.iterator(); 145 while (iterator.hasNext()) { 146 Object object = iterator.next(); 147 if (object == null) { 148 if (includeNullAndNaN) { 149 return Double.NaN; 150 } 151 } 152 else { 153 if (object instanceof Number) { 154 Number number = (Number) object; 155 double value = number.doubleValue(); 156 if (Double.isNaN(value)) { 157 if (includeNullAndNaN) { 158 return Double.NaN; 159 } 160 } 161 else { 162 total = total + number.doubleValue(); 163 count = count + 1; 164 } 165 } 166 } 167 } 168 return total / count; 169 } 170 171 /** 172 * Calculates the median for a list of values ({@code Number} objects). 173 * The list of values will be copied, and the copy sorted, before 174 * calculating the median. To avoid this step (if your list of values 175 * is already sorted), use the {@link #calculateMedian(List, boolean)} 176 * method. 177 * 178 * @param values the values ({@code null} permitted). 179 * 180 * @return The median. 181 */ 182 public static double calculateMedian(List values) { 183 return calculateMedian(values, true); 184 } 185 186 /** 187 * Calculates the median for a list of values ({@code Number} objects). 188 * If {@code copyAndSort} is {@code false}, the list is assumed 189 * to be presorted in ascending order by value. 190 * 191 * @param values the values ({@code null} permitted). 192 * @param copyAndSort a flag that controls whether the list of values is 193 * copied and sorted. 194 * 195 * @return The median. 196 */ 197 public static double calculateMedian(List values, boolean copyAndSort) { 198 199 double result = Double.NaN; 200 if (values != null) { 201 if (copyAndSort) { 202 int itemCount = values.size(); 203 List copy = new ArrayList(itemCount); 204 for (int i = 0; i < itemCount; i++) { 205 copy.add(i, values.get(i)); 206 } 207 Collections.sort(copy); 208 values = copy; 209 } 210 int count = values.size(); 211 if (count > 0) { 212 if (count % 2 == 1) { 213 if (count > 1) { 214 Number value = (Number) values.get((count - 1) / 2); 215 result = value.doubleValue(); 216 } 217 else { 218 Number value = (Number) values.get(0); 219 result = value.doubleValue(); 220 } 221 } 222 else { 223 Number value1 = (Number) values.get(count / 2 - 1); 224 Number value2 = (Number) values.get(count / 2); 225 result = (value1.doubleValue() + value2.doubleValue()) 226 / 2.0; 227 } 228 } 229 } 230 return result; 231 } 232 233 /** 234 * Calculates the median for a sublist within a list of values 235 * ({@code Number} objects). 236 * 237 * @param values the values, in any order ({@code null} not permitted). 238 * @param start the start index. 239 * @param end the end index. 240 * 241 * @return The median. 242 */ 243 public static double calculateMedian(List values, int start, int end) { 244 return calculateMedian(values, start, end, true); 245 } 246 247 /** 248 * Calculates the median for a sublist within a list of values 249 * ({@code Number} objects). The entire list will be sorted if the 250 * {@code ascending} argument is {@code false}. 251 * 252 * @param values the values ({@code null} not permitted). 253 * @param start the start index. 254 * @param end the end index. 255 * @param copyAndSort a flag that that controls whether the list of values 256 * is copied and sorted. 257 * 258 * @return The median. 259 */ 260 public static double calculateMedian(List values, int start, int end, 261 boolean copyAndSort) { 262 263 double result = Double.NaN; 264 if (copyAndSort) { 265 List working = new ArrayList(end - start + 1); 266 for (int i = start; i <= end; i++) { 267 working.add(values.get(i)); 268 } 269 Collections.sort(working); 270 result = calculateMedian(working, false); 271 } 272 else { 273 int count = end - start + 1; 274 if (count > 0) { 275 if (count % 2 == 1) { 276 if (count > 1) { 277 Number value 278 = (Number) values.get(start + (count - 1) / 2); 279 result = value.doubleValue(); 280 } 281 else { 282 Number value = (Number) values.get(start); 283 result = value.doubleValue(); 284 } 285 } 286 else { 287 Number value1 = (Number) values.get(start + count / 2 - 1); 288 Number value2 = (Number) values.get(start + count / 2); 289 result 290 = (value1.doubleValue() + value2.doubleValue()) / 2.0; 291 } 292 } 293 } 294 return result; 295 296 } 297 298 /** 299 * Returns the standard deviation of a set of numbers. 300 * 301 * @param data the data ({@code null} or zero length array not 302 * permitted). 303 * 304 * @return The standard deviation of a set of numbers. 305 */ 306 public static double getStdDev(Number[] data) { 307 ParamChecks.nullNotPermitted(data, "data"); 308 if (data.length == 0) { 309 throw new IllegalArgumentException("Zero length 'data' array."); 310 } 311 double avg = calculateMean(data); 312 double sum = 0.0; 313 314 for (int counter = 0; counter < data.length; counter++) { 315 double diff = data[counter].doubleValue() - avg; 316 sum = sum + diff * diff; 317 } 318 return Math.sqrt(sum / (data.length - 1)); 319 } 320 321 /** 322 * Fits a straight line to a set of (x, y) data, returning the slope and 323 * intercept. 324 * 325 * @param xData the x-data ({@code null} not permitted). 326 * @param yData the y-data ({@code null} not permitted). 327 * 328 * @return A double array with the intercept in [0] and the slope in [1]. 329 */ 330 public static double[] getLinearFit(Number[] xData, Number[] yData) { 331 332 ParamChecks.nullNotPermitted(xData, "xData"); 333 ParamChecks.nullNotPermitted(yData, "yData"); 334 if (xData.length != yData.length) { 335 throw new IllegalArgumentException( 336 "Statistics.getLinearFit(): array lengths must be equal."); 337 } 338 339 double[] result = new double[2]; 340 // slope 341 result[1] = getSlope(xData, yData); 342 // intercept 343 result[0] = calculateMean(yData) - result[1] * calculateMean(xData); 344 345 return result; 346 347 } 348 349 /** 350 * Finds the slope of a regression line using least squares. 351 * 352 * @param xData the x-values ({@code null} not permitted). 353 * @param yData the y-values ({@code null} not permitted). 354 * 355 * @return The slope. 356 */ 357 public static double getSlope(Number[] xData, Number[] yData) { 358 ParamChecks.nullNotPermitted(xData, "xData"); 359 ParamChecks.nullNotPermitted(yData, "yData"); 360 if (xData.length != yData.length) { 361 throw new IllegalArgumentException("Array lengths must be equal."); 362 } 363 364 // ********* stat function for linear slope ******** 365 // y = a + bx 366 // a = ybar - b * xbar 367 // sum(x * y) - (sum (x) * sum(y)) / n 368 // b = ------------------------------------ 369 // sum (x^2) - (sum(x)^2 / n 370 // ************************************************* 371 372 // sum of x, x^2, x * y, y 373 double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0; 374 int counter; 375 for (counter = 0; counter < xData.length; counter++) { 376 sx = sx + xData[counter].doubleValue(); 377 sxx = sxx + Math.pow(xData[counter].doubleValue(), 2); 378 sxy = sxy + yData[counter].doubleValue() 379 * xData[counter].doubleValue(); 380 sy = sy + yData[counter].doubleValue(); 381 } 382 return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter); 383 384 } 385 386 /** 387 * Calculates the correlation between two datasets. Both arrays should 388 * contain the same number of items. Null values are treated as zero. 389 * <P> 390 * Information about the correlation calculation was obtained from: 391 * 392 * http://trochim.human.cornell.edu/kb/statcorr.htm 393 * 394 * @param data1 the first dataset. 395 * @param data2 the second dataset. 396 * 397 * @return The correlation. 398 */ 399 public static double getCorrelation(Number[] data1, Number[] data2) { 400 ParamChecks.nullNotPermitted(data1, "data1"); 401 ParamChecks.nullNotPermitted(data2, "data2"); 402 if (data1.length != data2.length) { 403 throw new IllegalArgumentException( 404 "'data1' and 'data2' arrays must have same length." 405 ); 406 } 407 int n = data1.length; 408 double sumX = 0.0; 409 double sumY = 0.0; 410 double sumX2 = 0.0; 411 double sumY2 = 0.0; 412 double sumXY = 0.0; 413 for (int i = 0; i < n; i++) { 414 double x = 0.0; 415 if (data1[i] != null) { 416 x = data1[i].doubleValue(); 417 } 418 double y = 0.0; 419 if (data2[i] != null) { 420 y = data2[i].doubleValue(); 421 } 422 sumX = sumX + x; 423 sumY = sumY + y; 424 sumXY = sumXY + (x * y); 425 sumX2 = sumX2 + (x * x); 426 sumY2 = sumY2 + (y * y); 427 } 428 return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX) 429 * (n * sumY2 - sumY * sumY), 0.5); 430 } 431 432 /** 433 * Returns a data set for a moving average on the data set passed in. 434 * 435 * @param xData an array of the x data. 436 * @param yData an array of the y data. 437 * @param period the number of data points to average 438 * 439 * @return A double[][] the length of the data set in the first dimension, 440 * with two doubles for x and y in the second dimension 441 */ 442 public static double[][] getMovingAverage(Number[] xData, Number[] yData, 443 int period) { 444 445 // check arguments... 446 if (xData.length != yData.length) { 447 throw new IllegalArgumentException("Array lengths must be equal."); 448 } 449 450 if (period > xData.length) { 451 throw new IllegalArgumentException( 452 "Period can't be longer than dataset."); 453 } 454 455 double[][] result = new double[xData.length - period][2]; 456 for (int i = 0; i < result.length; i++) { 457 result[i][0] = xData[i + period].doubleValue(); 458 // holds the moving average sum 459 double sum = 0.0; 460 for (int j = 0; j < period; j++) { 461 sum += yData[i + j].doubleValue(); 462 } 463 sum = sum / period; 464 result[i][1] = sum; 465 } 466 return result; 467 468 } 469 470}