// based on https://web.archive.org/web/20160418004149/http://freespace.virgin.net/hugo.elias/graphics/x_water.htm

#pragma GCC optimize("Ofast")

#include <FastLED.h>
#include "FastLED_additions.h"

#define WIDTH 32
#define HEIGHT 32
#define NUM_LEDS ((WIDTH) * (HEIGHT))
CRGB leds[NUM_LEDS];

// the water needs 2 arrays each slightly bigger than the screen
#define WATERWIDTH (WIDTH + 2)
#define WATERHEIGHT (HEIGHT + 2)
uint8_t water[2][WATERWIDTH * WATERHEIGHT];

void setup() {
  FastLED.addLeds<NEOPIXEL, 2>(leds, NUM_LEDS);
  uint8_t val = 4; // affects edge damping
  // paint the static perimeter of the water arrays
  for (int i = 0; i < WATERWIDTH; i++) {
    // (this assumes a square matrix)
    water[0][i] = val; water[1][i] = val;    // top
    int j =  i + WATERWIDTH * (WATERHEIGHT - 1);
    water[0][j] = val; water[1][j] = val;    // bottom
    j = i * WATERWIDTH;
    water[0][j] = val; water[1][j] = val;    // left
    j = i * WATERWIDTH + WATERWIDTH - 1;
    water[0][j] = val; water[1][j] = val;    // right
  }
}

// map X & Y coordinates onto a horizontal serpentine matrix layout
uint16_t XY(uint8_t x, uint8_t y) {
  if (y & 1)
    return (y + 1) * WIDTH - x - 1;
  return y * WIDTH + x;
}

void loop() {
  // swap the src/dest buffers on each frame
  static uint8_t buffer = 0;
  uint8_t * const bufA = &water[buffer][0];
  buffer = (buffer + 1) % 2;
  uint8_t * const bufB = &water[buffer][0];

  // add a moving stimulus
  static uint16_t phaseX = 0, phaseY = 0, phaseZ = 0;
  phaseX += beatsin16(9, 300, 1200);
  phaseY += beatsin16(7, 400, 1500);
  phaseZ += beatsin16(2, 500, 5000);
  uint16_t x = 256 + ((uint32_t(WIDTH -  2) * (sin16(phaseX) + 32768)) >> 8);
  uint16_t y = 256 + ((uint32_t(HEIGHT - 2) * (sin16(phaseY) + 32768)) >> 8);
  uint8_t z = 127 + ((sin16(phaseZ) + 32768) >> 9);
  wu_water(bufA, x, y, z);

  // add random drops
  if (random8() < 32) {
    x = 256 + random8() * (WIDTH - 2);
    y = 256 + random8() * (HEIGHT - 2);
    // but only place it if the spot is dark
    if (4 >= bufA[(x >> 8) + WATERWIDTH * (y >> 8)])
      wu_water(bufA, x, y, 64);
  }

  // animate the water
  process_water(bufA, bufB);

  // display the water effect on the LEDs
  uint8_t * input = bufB + WATERWIDTH - 1;
  static uint16_t pal_offset = 0;
  pal_offset += 96;
  for (uint8_t y = 0; y < HEIGHT; y++) {
    input += 2;
    uint16_t xy = XY(0, y);
    int16_t stride = XY(1, y) - xy;
    for (uint8_t x = 0; x < WIDTH; x++) {
      // uint16_t xy = XY(x, y);
      leds[xy] = ColorFromPaletteExtended(RainbowColors_p, pal_offset + (*input << 6), *input, LINEARBLEND);
      input++; xy += stride;
    }
  }
  FastLED.show();
}

void process_water(uint8_t * src, uint8_t * dst) {
  src += WATERWIDTH - 1;
  dst += WATERWIDTH - 1;
  for (uint8_t y = 1; y < WATERHEIGHT - 1; y++) {
    src += 2; dst += 2;
    for (uint8_t x = 1; x < WATERWIDTH - 1; x++) {
      uint16_t t = src[-1] * 64 + src[1] * 64 + src[-WATERWIDTH] * 64 + src[WATERWIDTH] * 64;
      uint16_t bigdst = *dst * 128;
      if (t <= bigdst)
        *dst = (bigdst - t) >> 8;
      else {
        t -= bigdst;
        if (t < 32768)
          *dst = t >> 7;
        else
          *dst = 255;
      }
      src++; dst++;
    }
  }
}

// draw a blob of 4 pixels with their relative brightnesses conveying sub-pixel positioning
void wu_water(uint8_t * const buf, uint16_t x, uint16_t y, uint8_t bright) {
  // extract the fractional parts and derive their inverses
  uint8_t xx = x & 0xff, yy = y & 0xff, ix = 255 - xx, iy = 255 - yy;
  // calculate the intensities for each affected pixel
#define WU_WEIGHT(a, b) ((uint8_t)(((a) * (b) + (a) + (b)) >> 8))
  uint8_t wu[4] = {WU_WEIGHT(ix, iy), WU_WEIGHT(xx, iy),
                   WU_WEIGHT(ix, yy), WU_WEIGHT(xx, yy)
                  };
#undef WU_WEIGHT
  // multiply the intensities by the colour, and saturating-add them to the pixels
  for (uint8_t i = 0; i < 4; i++) {
    uint8_t local_x = (x >> 8) + (i & 1);
    uint8_t local_y = (y >> 8) + ((i >> 1) & 1);
    if (local_x < 1 || local_x > WIDTH || local_y < 1 || local_y > HEIGHT)
      continue;
    uint16_t xy = WATERWIDTH * local_y + local_x;
    // if (xy >= WATERWIDTH * WATERHEIGHT) continue;
    uint16_t this_bright = bright * wu[i];
    buf[xy] = qadd8(buf[xy], this_bright >> 8);
  }
}