Wednesday, 15 January 2014

Cleaning Noisy Time Series Data – Low Pass Filter C#

When working with time series data, like stock market prices, values can often contain a lot of noise, obscuring a real trend. One of the best ways to remove this noise is to run the data through a low pass filter.

Methods like simple moving averages and exponential moving averages are quick to implement and do a relatively good job. However, the disadvantages of these methods is that they only “look back” and do not take into account future values. This results in smoothed data which is out of phase with the original data-set, leading to peaks and troughs occurring later than reality.

A way to get around these issues is to implement a better filter, such as a Fast Fourier Transform or a Savitzky–Golay filter. However, these methods can be fairly complex and heavy to implement.

A simple method I use is shown below. I’m not sure if it’s a recognised technique, but I like to think of it as a one dimensional radial basis function. It looks back and forward around a value’s nearest neighbours, taking a weighted average, which decays exponentially by distance. And, like all good vacuum cleaners, this method cleans right up to the edges, by adding inferred linear slopes to the beginning and ends of the clean data-set.

The graph below shows a very noisy sine wave and its cleaner equivalent.


Here's the code - I hope you find it useful.

  1. using System;
  2. using System.IO;
  3.  
  4. class Program
  5. {
  6.     static void Main(string[] args)
  7.     {
  8.         int range = 5; // Number of data points each side to sample.
  9.         double decay = 0.8; // [0.0 - 1.0] How slowly to decay from raw value.
  10.         double[] noisy = NoisySine();
  11.         double[] clean = CleanData(noisy, range, decay);
  12.         WriteFile(noisy, clean);
  13.     }
  14.  
  15.     static private double[] CleanData(double[] noisy, int range, double decay)
  16.     {
  17.         double[] clean = new double[noisy.Length];
  18.         double[] coefficients = Coefficients(range, decay);
  19.  
  20.         // Calculate divisor value.
  21.         double divisor = 0;
  22.         for (int i = -range; i <= range; i++)
  23.             divisor += coefficients[Math.Abs(i)];
  24.  
  25.         // Clean main data.
  26.         for (int i = range; i < clean.Length - range; i++)
  27.         {
  28.             double temp = 0;
  29.             for (int j = -range; j <= range; j++)
  30.                 temp += noisy[i + j] * coefficients[Math.Abs(j)];
  31.             clean[i] = temp / divisor;
  32.         }
  33.  
  34.         // Calculate leading and trailing slopes.
  35.         double leadSum = 0;
  36.         double trailSum = 0;
  37.         int leadRef = range;
  38.         int trailRef = clean.Length - range - 1;
  39.         for (int i = 1; i <= range; i++)
  40.         {
  41.             leadSum += (clean[leadRef] - clean[leadRef + i]) / i;
  42.             trailSum += (clean[trailRef] - clean[trailRef - i]) / i;
  43.         }
  44.         double leadSlope = leadSum / range;
  45.         double trailSlope = trailSum / range;
  46.  
  47.         // Clean edges.
  48.         for (int i = 1; i <= range; i++)
  49.         {
  50.             clean[leadRef - i] = clean[leadRef] + leadSlope * i;
  51.             clean[trailRef + i] = clean[trailRef] + trailSlope * i;
  52.         }
  53.         return clean;
  54.     }
  55.  
  56.     static private double[] Coefficients(int range, double decay)
  57.     {
  58.         // Precalculate coefficients.
  59.         double[] coefficients = new double[range + 1];
  60.         for (int i = 0; i <= range; i++)
  61.             coefficients[i] = Math.Pow(decay, i);
  62.         return coefficients;
  63.     }
  64.  
  65.     static private void WriteFile(double[] noisy, double[] clean)
  66.     {
  67.         using (TextWriter tw = new StreamWriter("data.csv"))
  68.         {
  69.             for (int i = 0; i < noisy.Length; i++)
  70.                 tw.WriteLine(string.Format("{0:0.00}, {1:0.00}", noisy[i], clean[i]));
  71.             tw.Close();
  72.         }
  73.     }
  74.  
  75.     static private double[] NoisySine()
  76.     {
  77.         // Create a noisy sine wave.
  78.         double[] noisySine = new double[180];
  79.         Random rnd = new Random();
  80.         for (int i = 0; i < 180; i++)
  81.             noisySine[i] = Math.Sin(Math.PI * i / 90) + rnd.NextDouble() - 0.5;
  82.         return noisySine;
  83.     }
  84. }