/*
 * File:   main.c
 * Author: Eric Auer <auere@hochschule-trier.de>
 * Project: PIC PIC16F1459 controller and MCP3208 ADC used for Pulse Oximetry
 * License: modified zlib license with copyleft, see full terms below
 * Hint: Use "minicom -b 115200 -D /dev/ttyS0" to see output. Ctrl-A X exits.
 * 
 * Should compile at most picky (threshold -9) settings without any warnings.
 * 
 * Copyright (C) 2017 Eric Auer <auere@hochschule-trier.de>
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the author be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software in
 *    a product, all product owners must be given a copy of the source code.
 * 2. Altered source code versions must be plainly marked as such, and
 *    must not be misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 *    This notice has to be shipped with any hardware using this software.
 */

// automatically done by xc.h: #include <pic16f1459.h>

/* *** PIC16F1459 Configuration Bit Settings *** */

// CONFIG1 0xf9c4 (default: 0xffff)
#pragma config FOSC = INTOSC    // *** Oscillator Selection Bits (INTOSC oscillator: I/O function on CLKIN pin)
// default is FOSC = "external clock, high frequency, connected at CLKIN pin"
#pragma config WDTE = OFF       // *** Watchdog Timer Enable (WDT disabled)
// default is "enable, reset if watchdog untouched for > 18 ms"
#pragma config PWRTE = ON       // *** Power-up Timer Enable (PWRT enabled)
// default is "do not wait for power up timings to stabilize"
#pragma config MCLRE = ON       // MCLR Pin Function Select (MCLR/VPP pin function is MCLR)
// MCLR can only be disabled by disabling low power programming as well
#pragma config CP = OFF         // Flash Program Memory Code Protection (Program memory code protection is disabled)
// CP would disable flash read (CP can always be reset when erasing all flash)
#pragma config BOREN = OFF      // *** Brown-out Reset Enable (Brown-out Reset disabled)
// default is "enable brown out reset" (can be problematic at low supply power)
#pragma config CLKOUTEN = OFF   // Clock Out Enable (CLKOUT function is disabled. I/O or oscillator function on the CLKOUT pin)
// obviously, sending the clock to some I/O pin blocks that pin for other uses
#pragma config IESO = ON        // Internal/External Switchover Mode (Internal/External Switchover Mode is enabled)
// (allows short wake-up on intosc without waiting for extosc to warm up first)
#pragma config FCMEN = ON       // *** Fail-Safe Clock Monitor Enable (Fail-Safe Clock Monitor is enabled)
// default is disabled - FCMEN switches to internal 31 kHz when ext clock fails

// CONFIG2 (PLL upscales only 8 or 16 MHz to 24/32 or 48 MHz)
#pragma config WRT = OFF        // Flash Memory Self-Write Protection (Write protection off)
#pragma config CPUDIV = NOCLKDIV // *** USB: CLKDIV6 CPU System Clock Selection Bit (CPU system clock / 6) *** (CLKDIV2, 3 or 6 or NOCLKDIV)
#pragma config USBLSCLK = 48MHz // USB Low SPeed Clock Selection bit (System clock expects 48 MHz, FS/LS USB CLKENs divide-by is set to 8.) *** 24MHz or 48MHz
#pragma config PLLMULT = 3x     // PLL Multipler Selection Bit (3x Output Frequency Selected) *** (3x or 4x)
#pragma config PLLEN = DISABLED  // *** USB: ENABLED PLL Enable Bit (3x or 4x PLL Enabled) *** (PLL takes 2 msec to stabilize!)
#pragma config STVREN = ON      // Stack Overflow/Underflow Reset Enable (Stack Overflow or Underflow will cause a Reset)
#pragma config BORV = LO        // Brown-out Reset Voltage Selection (Brown-out Reset Voltage (Vbor), low trip point selected.)
#pragma config LPBOR = OFF      // Low-Power Brown Out Reset (Low-Power BOR is disabled)
#pragma config LVP = ON         // Low-Voltage Programming Enable (Low-voltage programming enabled)

// luckily, the LVP enable can only be disabled by high-voltage programming
// PLL enable in config words overrides the OSCCON register bits for that
// osc speeds settle in 1024 cycles (2nd) or circa 2 us (int) or 2 ms (pll)

// measured: 1.24 * 10 * 10000 delay units / second at INTOSC clock, so
// INTOSC clock defaults to energy-saving 0.5 MHz, 4 cycles / delay unit

// 4 MHz is not enough for at least 20 ksps, so at least 8 MHz clock needed,
// with clock/4 MIPS for most RISC instructions (a few take 2x or 3x the time).
#define _XTAL_FREQ 16000000
// defining _XTAL_FREQ also enables __delay_us and __delay_ms macros,

// *WARNING* PIC16F1459 is doing many things 8-bit style, so this code often
// uses char as loop counters & array index! Only okay for SMALL arrays!

// #pragma config statements should precede project file includes.
// Use project enums instead of #define for ON and OFF.

/* *** Include compiler specific, chip specific and general header files *** */

#include <xc.h>

#include <pic.h> // for _delay(unsigned long) and, with XTAL_FREQ, __delay_us

#include <stdlib.h> // abs(int), labs(long), ltoa(long, char*), itoa(int, char*)
#include <limits.h> // INT_MAX, INT_MIN, UINT_MAX, LONG_MAX, LONG_MIN, ...
// Note: Microchip PIC also offers "short long" which are 24 bit values

// possibly useful: stdio.h, stdlib.h, stddef.h ...

/* *** I/O port pin definitions *** */

// write LATxbits.LATxn or write PORTxbits.Rxn (both are equivalent)
// read PORTxbits.Rxn (set port to tristate first to be able to see input)
// set TRIS bits to 1 for input, 0 for output (set output driver to tristate)
// write I/O pin: use LAT (latch) or PORT (luckily, both do the same for writes)

// ADC_DISABLE is ~CS, change to LOW starts conversion, change to HIGH stops it
#define ADC_DISABLE LATCbits.LATC5
// ADC_CLOCK, read data after rising edge, chip updates data on falling edge
// *WARNING* Clock is written via PORT, not LAT, for maximum speed, but this
// can break bits by read-modify-write depending on TRIS and ANSEL settings!
#define ADC_CLOCK PORTCbits.RC3
// the following would be slower, because it needs bank switches PORT vs LAT:
// #define ADC_CLOCK LATCbits.LATC3
// ADC_DATA contains A/D bits from MCP3208 chip (always use PORT to read)
#define ADC_DATA PORTCbits.RC4

// LED to indicate problems, such as "too much ambient light" or "no finger"
#define ERROR_LED LATBbits.LATB4
// LED to indicate where bulk CPU time usage slows down sampling (OPTIONAL)
#define BUSY_LED LATAbits.LATA4

// LED to send RED light through the finger on the sensor
#define RED_LED LATCbits.LATC6
// LED to send INFRARED light through the finger to the sensor
#define IR_LED LATCbits.LATC7

/* *** Define lines for various settings, algorithm choice, debug, etc. *** */

// Define this to activate "analog" pulsating LED output of pulse AC data:
// output voltage moves in 24 out of 32 steps between 0 and 3 Volts, with 0
// for brightest LED and 3 for most dim LED (unity gain amp, LED to supply)
#define PULSE_LED 1

// Define this to get logs of ADC sample bursts around RED / IR LED toggles
#define DEBUG_TIMINGS 1

// Define this to get live AC curve data as text on the serial terminal
#define PULSE_TEXT 1

// Define this to get live AC curve bargraphs on the serial terminal
// #define PULSE_BARS 1

// Select how often analysis should be done for curve data and bargraph output:
// For example 3 for "every 3rd decimated sample". Adjust according to available
// CPU speed and UART output channel speed versus configured output verbosity!
#define INFO_MODULO 1

// Define for 14 tap FIR low-pass decimate filter by subsample factor X where
// X between 2 and 4 probably makes most sense
#define DECIMATE14 2

// Define negative value to sample ambient only once per ring buffer round,
// use positive value to sample ambient with each cycle. For 8 MHz: 4, 5, -8
// For 16 MHz: 6, 8, -8
#define ASYM_SETTLE_SAMPLES_OFF -7

// Curve size should be circa 1 to 1.5 seconds to see a complete pulse shape
// and to always have the ability to calculate AC and DC min and max ranges.
// Each decimated curve sample takes 8 bytes and should be ca 1/60 second long.
// Minimum curve size is 64 because the buffer is also used for other things.
// Maximum curve size is ca. 90 to avoid too much drift and due to limited RAM.
#define CURVE_SIZE 80

/* *** End of defines, static array variables follow *** */

// Low sampling rate buffers after 1:8 decimate, sampling rate around 60 Hz
// (decimate has to do a low-pass filter at 30 Hz to avoid alias artifacts)
static unsigned int red_curve[CURVE_SIZE + 5]; // 5 extra to fold wraps
static unsigned int ir_curve[CURVE_SIZE + 5]; // 5 extra to fold wraps

// Note: Wrap-fold space would only be needed here for a 2nd low-pass round...
static unsigned int red_ac_curve[CURVE_SIZE + 5]; // 5 extra to fold wraps
static unsigned int ir_ac_curve[CURVE_SIZE + 5]; // 5 extra to fold wraps

// Used for printing integers: itoa, ltoa, utoa, ultoa, ... calls work with it
static char textbuf[16];

// Note: Big arrays must be static because PIC stack-simulation is too small

// High sampling rate buffers: 16 item ring buffers, input for decimate filter
// Idea: Use oversampling, low-pass filter to 30 Hz, decimate to 60 samples/sec
static unsigned int red_quick[16 + 5]; // some extra items for wrap processing
static unsigned int ir_quick[16 + 5]; // as above

/* *** End of static variables, individual functions follow *** */

/** Send the given buffer contents using the UART (no handshake!)
 *  Note: The buffer 
 */
void uart_send(const char * buffer) {
    TXIE = 0; // no UART IRQ please
    TXEN = 1; // activate transmitter and set TXIF
    while (!TRMT); // wait until last byte is completely sent
    // while (!TXIF); // wait until UART can take more data
    while (buffer[0] != 0) {
        // TRMT is the "transmit shift register empty" flag
        // TXIF is the "transmit buffer register empty" flag
        while (!TXIF); // wait until UART can take more data
        TXREG = buffer[0];
        buffer++;
    }
    // while (!TRMT); // wait until last byte is completely sent
    return;
}

#ifdef PULSE_BARS
/** Generate bargraph string and store that in the provided buffer,
 *  returning a pointer to the same buffer. Minimum buffer size is
 *  16. Value gets clipped to at most 3*14. Bar length is value / 3.
 *  string format is bar + spaces + nul. The last char depends
 *  on (value % 3) in ASCII art interpolation. Value 0 shows as dot.
 *  Returned string length is kept constant.
 */
const char * bargraph(char * buffer, unsigned int value) {
    unsigned char index = 0;
    if (value > (3*14u)) {
        value = 3*14u;
    }
    if (value == 0) {
        buffer[index] = '.';
        index++;
    }
    while (value >= 3u) {
        buffer[index] = '#';
        index++;
        value -= 3;
    }
    if (value == 1) {
        buffer[index] = '!';
        index++;
    } else if (value == 2) {
        buffer[index] = 'I';
        index++;
    }
    while (index < 15u) { // pad to fixed length
        buffer[index] = ' ';
        index++;
    }
    buffer[15] = 0;
    return buffer;
}
#endif

/** Read one 12 bit sample from ADC chip MCP3208 as fast as possible, which
 *  means (20 * 6) + a few RISC instructions of 4 cycles each on PIC16F1459.
 *  The implementation also takes care to keep protocol timings jitter-free.
 * 
 *  At 16 MHz CPU clock speed, the ADC clock is 2/3 MHz (max 2 MHz at 5 Volts,
 *  1 MHz at low voltage) which means circa 30 kilo samples per second A/D.
 */
unsigned int adc_read_once(void) {
    // *WARNING* Clock is written via PORT, not LAT, for maximum speed, but this
    // may break some bits: Read-modify-write reads INPUT while updating OUTPUT
    char backup_LATC = LATC;
    // would be even faster to ab-use the I2C / SPI hardware block for this,
    // but that would be a bit messy in the setup and pin assignment instead
    unsigned int value = 0;
    // Note: Even a byte sized loop counter is slow here: The PIC has only 1
    // "result" register, 2 pointer regs and a tiny 16 byte "zero page" of RAM.
    // Only "zero page" and current (128 byte) bank RAM access is fast. I/O
    // use banks, the "bulk" of RAM is blocks of 80 bytes in each bank, which
    // can easily be another bank than what was your "current" I/O bank before.
    // The free Microchip XC8 C compiler version is not very smart either...
    ADC_CLOCK = 0;
    // make sure that clock starts at LOW and abort old conversions, if any:
    ADC_DISABLE = 1; // abort old conversions
    ADC_DISABLE = 1; // (give chip some time)
    ADC_DISABLE = 0; // falling edge of ADC_DISABLE aka ~CS starts protocol
    // first 5 clocks could pass command: 1, diff flag, 3 bit channel addr
    // not sending anything yet - command data pin simply tied to HIGH ;-)
    // Each LOW and HIGH part of the clock has to be at least 1/4 ys long
    
    // adc_low_clock: test for ADC_DATA bit is bogus, just to keep same timings
#define adc_low_clock() ADC_CLOCK = 0; if (ADC_DATA) value |= 8192;
    // adc_high_clock: set one bit based on ADC_DATA, use bogus 8192 to ignore
#define adc_clock(x) adc_low_clock() ADC_CLOCK = 1; if (ADC_DATA) value |= x;
    // adc_high_clock_ignore: test for ADC_DATA bit is bogus, see above
#define adc_clock_ignore() adc_clock(8192)
    
    adc_clock_ignore(); // chip receives "1" start bit
    adc_clock_ignore(); // chip receives "absolute" bit (not differential)
    adc_clock_ignore(); // chip reads high channel address bit
    adc_clock_ignore(); // chip reads middle channel address bit
    adc_clock_ignore(); // chip reads low channel address bit, sample gate opens
    // Sample and hold circuit: Gate opens at low to high clock edge
    adc_clock_ignore(); // sample and hold active during this clock cycle
    // Sample and hold gate closes at next low to high clock edge
    adc_clock_ignore(); // sample gate closes, chip sends leading 0 bit

    // last 12 clocks are for receiving 12 bits of data, MSB first
    // (additional clocks would receive bits back up to MSB, then 00s)
    // Note: data becomes valid 1/4 ysec after each clock high to low edge

    // bit tests and bit changes (even if by |= NUMBER) compile as fast
    // bcf (clear), bsf (set) and btfsc (test), so this unrolled loop is
    // only 4 RISC instructions for each bit fetched, as "value" is in the
    // compiled "stack" which is in the fast "in all banks" memory area :-)

    adc_clock(2048);
    adc_clock(1024);
    adc_clock(512);
    adc_clock(256);
    adc_clock(128);
    adc_clock(64);
    adc_clock(32);
    adc_clock(16);
    adc_clock(8);
    adc_clock(4);
    adc_clock(2);
    adc_clock(1);

    // raise ~CS again, so the circuits can recover for the next A/D call!
    // (without this, additional clocks would output more bits, for example)
    ADC_DISABLE = 1;
    // One extra clock cycle to make sure that the chip has time to reset...
    adc_clock_ignore();
    // restore LATC bit pattern in case "read-modify-write PORT" has damaged it
    LATC = backup_LATC = LATC;
    value &= ~8192; // remove bogus bit again before returning the A/D value
    return value;
}

/** Read a burst of 8 or more ADC samples with built-in low-pass and
 *  a scaled output value with 15 bits instead of the raw A/D 12 bits.
 *  See Octave signal package freqz([4 8 15 18 21 18 15 8 4]*18/256) ...
 *
 *  Estimated number of adc_read() smoothed "samples" per second:
 *  2200 at 16 MHz CPU clock, 1100 at 8 MHz and 550 at 4 MHz.
 * 
 *  Much less than 1/9th of the sampling rate available for adc_read_once(),
 *  which would have been above 3000 at 16 MHz clock and so on ...
 */
unsigned int adc_read(void) {
    unsigned int values[9];
    unsigned short long int value;
    for (unsigned char i=0; i < 9; i++) {
        values[i] = adc_read_once();
    }
    // Weights are hand-picked for low bit weight while watching freqz() output
    unsigned int small_value = values[0] + values[8]; // 1
    small_value += (values[1] + values[7] + values[1] + values[7]); // 2
    value = small_value;
    value <<= 2; // 4 and 8
    value += ((unsigned short long int)(values[2] + values[6] + values[3] + values[5])) << 4; // 16
    small_value = values[2] + values[6]; // for 15
    value -= small_value; // ... for 15
    small_value = values[3] + values[5] + values[3] + values[5]; // for 18
    value += small_value; // ... for 18
    small_value = values[4];
    small_value += (small_value << 2); // factor 5
    value += small_value;
    small_value = values[4] << 4; // factor 16 (result max 64k)
    value += small_value; // factor now 16+5 = 21
    // value is at most 111 * 4095 now, so multiplication by 18 is safe
    value += (value << 3); // multiply sum by 9
    value += value; // ... and by 2
    small_value = ((unsigned int *)(&((char *)(&value))[1]))[0]; // high 16 bits
    // overall gain: 111*18/256 = 7.805, max output 31960
    return small_value;
}

/** Fast 5-tap linear phase FIR low-pass filter with weights
 *  1 8 15 8 1 and gain 33/32, slightly above unity gain!
 */
unsigned int five_tap_fir(const unsigned int * buffer) {
    unsigned short long int sum = buffer[2]; // 1
    sum += buffer[2]; // 2
    sum += buffer[1];
    sum += buffer[3];
    sum <<= 3; // 8 and 16
    sum -= buffer[2]; // 8 and 15
    sum += buffer[0];
    sum += buffer[4];
    sum += 16u; // prepare to round
    sum >>= 5; // divide by 33
    unsigned int result = ((unsigned int *)(&sum))[0]; // return low 16 bits
    if (result > INT_MAX) {
        return INT_MAX;
    } else {
        return result;
    }
}

#ifdef DECIMATE14
/** Fourteen tap FIR low-pass filter with at least 50 dB attenuation of
 *  frequencies above one third and almost unity (992/1024 = 0.97) gain.
 * 
 *  Takes offset of most recently added buffer item offs as input and
 *  expects a size 16 ring buffer as the input array. Uses shift and add
 *  instead of multiplication and bit and byte shift instead of division.
 * 
 *  Used weights: [3 14 34 64 100 131 150  150 131 100 64 34 14 3]
 */
unsigned int fourteen_tap_fir(const unsigned int * buffer, unsigned char offs) {
    unsigned long int sum;
    unsigned short long int part;
    unsigned char offs2 = offs;
    offs2 += 13u;
    part = buffer[offs & 15u]; // [0]
    part += buffer[offs2 & 15u]; // [13]
    sum = part; // 1
    part += part; // 2
    sum += part; // 3
    offs++;
    offs2--;
    part = buffer[offs & 15u]; // [1]
    part += buffer[offs2 & 15u]; // [12]
    // part = (part << 3) - part; // 7 // does this work?
    part <<= 3; // 8
    part -= buffer[offs & 15u]; // [1] 8 to 7
    part -= buffer[offs2 & 15u]; // [12] 8 to 7
    sum += part + part; // 14
    offs++;
    offs2--;
    part = buffer[offs & 15u]; // [2]
    part += buffer[offs2 & 15u]; // [11]
    part += part; // 2
    sum += part;
    part <<= 4; // 2*16=32
    sum += part; // 2+32=34
    offs++;
    offs2--;
    part = buffer[offs & 15u]; // [3]
    part += buffer[offs2 & 15u]; // [10]
    part <<= 6; // 64
    sum += part;
    offs++;
    offs2--;
    part = buffer[offs & 15u]; // [4]
    part += buffer[offs2 & 15u]; // [9]
    part <<= 3; // 8
    part += (part + part); // 24
    part += buffer[offs & 15u]; // [4] again
    part += buffer[offs2 & 15u]; // [9] again
    part <<= 2; // 25*4 = 100
    sum += part;
    offs++;
    offs2--;
    part = buffer[offs & 15u]; // [5]
    part += buffer[offs2 & 15u]; // [8]
    sum += part + part + part; // 3
    part <<= 7; // 128
    sum += part; // 131
    offs++;
    offs2--;
    part = buffer[offs & 15u]; // [6]
    part += buffer[offs2 & 15u]; // [7]
    part += part; // 2
    sum += part;
    part += part; // 4
    sum += part;
    part <<= 2; // 16
    sum += part;
    part <<= 3; // 128
    sum += part; // 128+16+4+2 = 150
    sum += 2; // prepare to round
    sum >>= 2; // divide by 4
    return ((unsigned int *)(&((char *)(&sum))[1]))[0]; // divide by 256
}
#endif

/** Select clock speed, setup serial port UART, configure I/O pins...
 * The following hardware is expected to be connected to the following pins:
 * 
 * C3 clock to MCP3208 A/D converter
 * C4 data from A/D converter (data *to* A/D converter: connect to +5V)
 * C5 ~CS to A/D converter
 * 
 * Note: A/D converter CH7 input is connected to unity gain OpAmp output
 * (LM324 will be okay for up to 3.5 Volt signals at 5 Volt supply voltage)
 * while OpAmp input is connected to 10 kOhm to ground and SDP 8406-3 photo
 * transistor (adjust resistor to use other sensor). Use another unity gain
 * OpAmp as low impedance Vref source. Vref defined by red LED and Si diode
 * in series to ground and 2.2 kOhm to 5 Volt is good enough: Should be in
 * the 2.0 to 3.0 Volt range for useful A/D conversion performance and gain.
 * 
 * A 2.2 nF capacitor in parallel to the 10 kOhm R forms a simple low-pass:
 * Any low-pass between sensor and CH7 should be weak (high cut-of frequency)
 * because the signal is actually multiplexed into dark, RED, infrared cycles!
 * 
 * The code uses 9x oversampling in adc_read itself, followed by an additional
 * low-pass decimate filter step: R-value calc should use 1/2 - 30 Hz bandwidth.
 * 
 * C6 transistor switch to enable bright RED LED to illuminate finger,
 * for example 660 nm 4.5 cd at 20 mA LED but with 560 Ohm resistor at 5 Volts.
 * C7 transistor switch to enable bright IR (infrared) LED to illuminate finger,
 * for example 940 nm 55 nm (60..320 mW/sr?) at 100 mA LED but with 680 Ohms,
 * to limit power consumption and avoid overmodulation of the sensor signal.
 * 
 * C2 is analog (0 to 4 Volt, 32 steps) output for "pulsating" LED, connected
 * to unity gain OpAmp. Output of OpAmp connects to 220 Ohms and LED to 5 Volts
 * (because LM324 output can only swing from 0 to (5-1.5) Volts, not rail/rail)
 * 
 * B4 "on" indicates "error" (for example "no finger at sensor"), connect LED
 * A4 "on" indicates "busy" (CPU busy with computations), optional LED
 * 
 * B7 connect to TTL-to-RS232 level shifter for serial output of measurements
 * B5 optionally connect to RS232-to-TTL level shifter (future control channel)
 * 
 * A3 ~MCLR (reset and programming mode enable input pin, inverted) Add pull-up!
 * C0 ICSP data (programming mode data, bidirectional)
 * C1 ICSP clock (programming mode clock, from programmer)
 * 
 * Note: A0, A1 and USB3V3 are reserved for USB. A4 and A5 are for clock stuff.
 * Each LED is switched on for 250 ms at boot as a quick functional check :-)
 * 
 * Additional hardware: Current limiting resistors for all LED as appropriate,
 * blocking capacitors for supply voltage near chips, Vref stabilizing caps and
 * safety resistors for all data to the PIC16F1459: ICSP clock, ICSP data, A/D
 * data, optional RS232 to PIC data to avoid hardware stress if software fails.
 */
void setup_hardware(void) {
    LATA = 0;
    LATB = 0;
    LATC = 0;

    OSCCONbits.SPLLEN = 0; // could activate PLL here, if not already set in config
    // OSCCON.SPLLMULT = 1; // 1 for 3x, 0 for 4x, if not set in config
    // Note: USB always needs 3x (needs 24 or 48 MHz clock)
    // Note: 4x 16 MHz clock not allowed, only 3x 8, 4x 8 and 3x 16

#if (_XTAL_FREQ == 4000000)
    OSCCONbits.IRCF = 13;
#elif (_XTAL_FREQ == 8000000)
    OSCCONbits.IRCF = 14; // 15/14 = 16/8 MHz*, 13..8 = 4/2/1/0.5/0.25/0.125 MHz
#elif (_XTAL_FREQ == 16000000)
    OSCCONbits.IRCF = 15;
#else
#error "Selected _XTAL_FREQ not implemented yet!"
#endif
    
    // special IRCF: 3/4 31.25/62.5 kHz, 1 for low-power 31 kHz slow int osc
    // Note: * 8 MHz and 16 MHz turn into 24, 32 or 48 MHz by enabling the PLL!
    // OSCCON.SCS = 3; // 0 use config fosc setting, 1 secondary, 2/3 int osc

    // OSCSTAT bits: PLLRDY (PLL ready), HF/LFIOFR (fast/slow int osc ready)
    // HFIOFS (int osc stable: 0 during start-up), SOSCR (2nd osc stable) ...

    SYNC = 0; // use async UART
    SPEN = 1; // enable UART output
    // TXEN = 1; // activate UART transmitter
    TX9 = 0; // no parity
    SCKP = 0; // no inverse output
    BRGH = 1; // use high baud rate
    BRG16 = 1; // use 16 bit baud rate counter
    // see DS41639A PIC16F1459 data sheet page 274 for popular values:
#if (_XTAL_FREQ == 4000000)
    SPBRG = 16; // 16 for 58820 baud (ca 57600), 51 for 19230 (ca 19200) @ 4 MHz
    // in 8 bit mode, only SPBRGL is used, SPBRGH of SPBRG is not used
#elif (_XTAL_FREQ == 8000000)
    SPBRG = 16; // 16: 117600 baud (115.2), 34: 57140 (57.6), 103: 19.23 @ 8 MHz
#elif (_XTAL_FREQ == 16000000)
    SPBRG = 34; // 34: 114.3 (115.2) kbps, 68: 58.0 (57.6), 207: 19.23 @ 16 MHz
    // Note: No maximum baud rate given, TTL vs RS232 converter can do 250 kbps
#else
#error "Serial port speed not defined yet for this clock speed!"
#endif
    
    // Setting a TRIS bit to 0 enables digital output
    // A0 and A1 reserved for USB, A2 does not exist (is USB 3V3 pin)
    TRISAbits.TRISA4 = 0; // CLKOUT (optional) *** status BUSY LED (optional)
    TRISAbits.TRISA5 = 1; // CLKIN (optional)
    // TRISAbits.TRISA3 = 1; // A3 is reserved for MCLR (~reset / ~program)
    ANSELA = 0; // disable all analog inputs, enable all digital PORT reads

    TRISBbits.TRISB4 = 0; // SPI and I2C (optional) *** status ERROR LED enable
    TRISBbits.TRISB5 = 0; // *** RxD from serial port partner
    TRISBbits.TRISB6 = 0; // SPI and I2C (optional)
    TRISBbits.TRISB7 = 0; // *** TxD to serial port partner
    ANSELB = 0; // disable all analog inputs, enable all digital PORT reads
   
    TRISCbits.TRISC0 = 1; // ICSP data (during programming)
    TRISCbits.TRISC1 = 1; // ICSP clock (during programming)

    TRISCbits.TRISC2 = 0; // DAC out (optional) *** LED enable
    TRISCbits.TRISC3 = 0; // *** ADC chip CLOCK
    ADC_CLOCK = 1;
    // Note: Use PORT, not LAT, to READ data
    TRISCbits.TRISC4 = 1; // *** ADC chip DATA (from chip)
    // Note: Control data to chip not used yet, tied to HIGH (always use CH7)
    TRISCbits.TRISC5 = 0; // *** ADC chip ~CS (HL starts A/D, LH stops A/D)
    ADC_DISABLE = 1;
    TRISCbits.TRISC6 = 0; // *** RED LED enable
    TRISCbits.TRISC7 = 0; // *** IR LED enable
    ANSELC = 0; // disable all analog inputs, enable all digital PORT reads

    // pinout: (20) - A0/* A1/* 3V3  C0/* C1/* C2 B4 B5 B6 (11)
    // pinout:  (1) + A5/* A4   A3/* C5   C4   C3 C6 C7 B7 (10)

    FVRCONbits.FVREN = 1; // activate reference voltage generator
    FVRCON |= 32; // TSEN: activate internal temperature sensor
    FVRCON |= 16; // TSRNG: use 4-diode temp, no problem at 5 Volts
    FVRCONbits.ADFVR = 2; // 0 none, 1/2/3 for 1/2/4 * 1.024 Volts A/D ref
    FVRCONbits.CDAFVR = 3; // 0 none, 1/2/3 for 1/2/4 * 1.024 Volts D/A ref
    // no need to wait for FVRCONbits.FVRRDY, Vref is always ready on PIC16F1459
    
    ERROR_LED = 0;
    BUSY_LED = 0;
    
    // Test all connected LED at boot
    __delay_ms(25); // give the serial port some time to get in sync
    uart_send("\r\nLED test sequence:\r\n\r\n");
    uart_send("ERROR LED\r\n");
    ERROR_LED = 1;
    __delay_ms(250);
    ERROR_LED = 0;
    uart_send("BUSY LED\r\n");
    BUSY_LED = 1;
    __delay_ms(250);
    BUSY_LED = 0;
    uart_send("INFRARED finger LED\r\n");
    IR_LED = 1;
    __delay_ms(250);
    IR_LED = 0;
    uart_send("RED finger LED\r\n");
    RED_LED = 1;
    __delay_ms(250);
    RED_LED = 0;
    uart_send("LED test done.\r\n\r\n");
    // End of LED test cycle

    uart_send("Temperature self-test:\r\n\r\n");
    // conversion time should be between 1 and 4 ysec
#if (_XTAL_FREQ == 4000000)
    ADCON1bits.ADCS = 1; // 1:8 for 2 ysec conversion time
#elif (_XTAL_FREQ == 8000000)
    ADCON1bits.ADCS = 5; // 1:16 for 2 ysec conversion time
#elif (_XTAL_FREQ == 16000000)
    ADCON1bits.ADCS = 2; // 1:32 for 2 ysec conversion time
#else
#error "A/D conversion speed not defined yet for this clock speed!"
#endif
    ADCON1bits.ADFM = 1; // right justified 10 bit value (6 MSB are zero)
    ADCON2bits.TRIGSEL = 0; // no auto A/D conversion triggers

    ADCON1bits.ADPREF = 0; // supply voltage as reference to compare to Vref
    FVRCONbits.ADFVR = 2; // 0 none, 1/2/3 for 1/2/4 * 1.024 Volts A/D ref
    __delay_ms(1);
    // channels: 3..11 input 3..11, 29 temperature, 30 D/A, 31 Vref
    ADCON0bits.CHS = 31;
    ADCON0bits.GO = 0; // do not start conversion yet
    ADCON0bits.ADON = 1; // activate A/D converter (starts consuming power)
    __delay_ms(2); // give sample and hold time to sample (at least 8 ysec)
    ADCON0bits.GO = 1; // start conversion (freeze sample and hold now!)
    // wait for conversion to complete, but not for too long
    __delay_ms(1);
    for (unsigned int i=0; i < INT_MAX; i++) {
        if (ADCON0bits.GO == 0) {// conversion done
            // if 1023 counts are Vsupply then ??? counts are 2.048 Volts
            unsigned short long int base_voltage = 1023ul * 2048ul;
            // ... or use 20951 / vref_raw = voltage in units of 0.1 Volts
            unsigned int vref_raw = ADRES; // A/D result in Vsupply / 1023 units
            if (vref_raw < 210) {
                vref_raw = 210; // use 9.98 Volts for "overflow"
            }
            // Note: Vsupply below 2.048 Volts would give vref_raw 1023,
            // but Vref itself would also go down before that can happen
            unsigned short long int vsupply = base_voltage / vref_raw;
            unsigned int volt16 = ((unsigned int *) &vsupply)[0]; // low 16 bit
            uart_send("Supply voltage: ");
            uart_send(utoa(&textbuf[0], volt16, 10));
            uart_send(" mV\r\n");
            break;
        }
    }
    
    ADCON1bits.ADPREF = 3; // 0 supply voltage, 2 external Vref, 3 internal Vref
    // channels: 3..11 input 3..11, 29 temperature, 30 D/A, 31 Vref
    ADCON0bits.CHS = 29; // temperature sensor
    FVRCONbits.ADFVR = 3; // 0 none, 1/2/3 for 1/2/4 * 1.024 Volts A/D ref
    __delay_ms(1);
    ADCON0bits.GO = 0; // do not start conversion yet
    ADCON0bits.ADON = 1; // activate A/D converter (starts consuming power)
    __delay_ms(2); // time for sample and hold to sample (> 8 ysec at 10 kOhms)
    ADCON0bits.GO = 1; // start conversion, stop sample and hold, enter "hold"
    // wait for conversion to complete, but not for too long
    unsigned int temp_raw = 0;
    __delay_ms(1);
    for (unsigned int i=0; i < INT_MAX; i++) {
        if (ADCON0bits.GO == 0) {// conversion done
            temp_raw = ADRES << 2; // A/D result (0 to 1023 * Vref / 1023)
            uart_send("Temperature sensor voltage: ");
            uart_send(utoa(&textbuf[0], temp_raw, 10));
            uart_send(" mV\r\n");
            break;
        }
    }
    if (temp_raw == 0) {
        uart_send("No temperature sensor reading?\r\n");
        temp_raw = ADRES << 2; // A/D result (0 to 1023 * Vref / 1023)
        uart_send("Temperature sensor voltage: ");
        uart_send(utoa(&textbuf[0], temp_raw, 10));
        uart_send(" mV\r\n");
    }
    uart_send("Temperature self-test done.\r\n\r\n");
    ADCON0bits.ADON = 0; // disable A/D converter again, not really used later
    
    DACCON0bits.DACOE1 = 0; // disable output 1
    DACCON0bits.DACOE2 = 0; // disable output 2
    FVRCONbits.CDAFVR = 3; // 0 none, 1/2/3 for 1/2/4 * 1.024 Volts D/A ref
    __delay_ms(1);
    DACCON0bits.DACEN = 1; // enable D/A converter (resistor ladder)
    DACCON0bits.DACOE1 = 1; // enable first output pin option: C2 (DAC LED)
    // DACCON0bits.DACOE2 = 1; // enable second output pin option: C3
    DACCON0bits.D1PSS = 2; // 0 supply voltage, 1 external Vref, 2 internal
    // output can also be used for comparisons and as A/D conversion input
    // possible comparisons: input pins to each other or Vref or D/A output

    uart_send("LED pulsation test:\r\n");
    for (unsigned char cycle=3; cycle != 0; cycle--) {
        for (unsigned char i=31; i != 0; i--) {
            DACCON1 = i;
            __delay_ms(20);
        }
        for (unsigned char i=0; i < 31; i++) {
            DACCON1 = i;
            __delay_ms(20);
        }
    }
    uart_send("LED pulsation test done.\r\n\r\n");

    // set output to 4 Volts to disable "pulsating" LED
    DACCON1 = 31; // output level, between 0 and 31 * Vref / 31
    
    // Two additional pseudo D/A channels are available as PWM module, output
    // via pins C5 and C6 (in the case of PIC16F1459). Can do 5 to 10 bit duty
    // cycle resolution, depending on used PWM frequency (see pages 287-292)

    // NOT yet used: timers (8 and 16 bit), sync serial port (i2c or spi),
    // interrupts, low power sleep, 2 bit complimentary waveform generator CWG,
    // watchdog, self-flashing, basic USB module, 

#ifdef TIMEDEMO_ADC
    uart_send("Timing demo: 100 000 ADC multi-read adc_read() calls\r\n");
    // ca 3000 per second at 16 MHz without low-pass, 2200 with "fast" low-pass
    // ... which means for 2 channels with 1 "on-settle sample" per switch and
    // "only once-per-round ASYM_SETTLE_SAMPLES_OFF" circa 750 per second
    for (unsigned int i=0; i < 50000; i++) {
        (void)adc_read();
        (void)adc_read();
    }
    uart_send("End of timing demo.\r\n\r\n");
#endif
        
    uart_send("Hardware ready!\r\n\r\n");
}

#ifdef DEBUG_TIMINGS
    // Show timing of signal after LED switching, using top speed bursts
#if CURVE_SIZE < 64
#error "CURVE_SIZE must be at least 64 for this"
#endif
/** Do 5 rounds of quick burst A/D conversion series for
 * ambient - IR - ambient - red measurements, each followed
 * by printing the first 29 adc_read results for each phase.
 * This function helps to inspect signal settle the timings.
 * 
 * Note: This overwrites the first 64 items of each of the 4
 * main curve buffers to store traces of A/D conversion values!
 */
void adc_burst_test_run(void) {
    uart_send("A/D conversion burst test:\r\n");
    for (unsigned int i = 0; i < 768; i++) { // 3 rounds of 256 samples each
        if ((i & 192) == 0) {
            if ((i & 255) == 0) {
                IR_LED = 0;
                RED_LED = 1;
                for (unsigned char j=0; j<64; j++) {
                    (void)adc_read();
                }
            }
            IR_LED = 0;
            RED_LED = 0;
            ir_ac_curve[i & 63] = adc_read();
        } else if ((i & 192) == 64) {
            IR_LED = 1;
            RED_LED = 0;
            ir_curve[i & 63] = adc_read();
        } else if ((i & 192) == 128) {
            IR_LED = 0;
            RED_LED = 0;
            red_ac_curve[i & 63] = adc_read();
        } else if ((i & 192) == 192) {
            IR_LED = 0;
            RED_LED = 1;
            red_curve[i & 63] = adc_read();
        }
        if ((i & 255) == 255) { // print (starts of) logged ADC traces
            IR_LED = 0;
            RED_LED = 0;
            uart_send("\r\nDark:\t");
            for (unsigned char j=0; j<29; j++) { // max 59
                uart_send(utoa(&textbuf[0], ir_ac_curve[j], 10));
                if (j == 9 || j == 19) { // ((j % 10) == 9) {
                    uart_send("\r\n\t");
                } else {
                    uart_send(" ");
                }
            }
            uart_send("\r\nIR:\t");
            for (unsigned char j=0; j<29; j++) { // max 59
                uart_send(utoa(&textbuf[0], ir_curve[j], 10));
                if (j == 9 || j == 19) { // ((j % 10) == 9) {
                    uart_send("\r\n\t");
                } else {
                    uart_send(" ");
                }
            }
            uart_send("\r\nDark:\t");
            for (unsigned char j=0; j<29; j++) { // max 59
                uart_send(utoa(&textbuf[0], red_ac_curve[j], 10));
                if (j == 9 || j == 19) { // ((j % 10) == 9) {
                    uart_send("\r\n\t");
                } else {
                    uart_send(" ");
                }
            }
            uart_send("\r\nRed:\t");
            for (unsigned char j=0; j<29; j++) { // max 59
                uart_send(utoa(&textbuf[0], red_curve[j], 10));
                if (j == 9 || j == 19) { // ((j % 10) == 9) {
                    uart_send("\r\n\t");
                } else {
                    uart_send(" ");
                }
            }
            uart_send("\r\n");
        } // print burst logs
    }
    
    unsigned int filler = 100;
    for (unsigned char j=0; j < (CURVE_SIZE+5); j++) {
        red_curve[j] = filler;
        ir_curve[j] = filler;
        red_ac_curve[j] = filler;
        ir_ac_curve[j] = filler;
        filler += 100;
    }
    
    uart_send("\r\nA/D conversion burst test done.\r\n\r\n");
    IR_LED = 0;
    RED_LED = 0;
}
#endif

/* *** Main Pulsoximetry program: Setup Microcontroller, do sample loop *** */

/** Called at Microcontroller boot. Never left. Returning from main would
 * simply cause a reboot of the Microcontroller: You can check out any
 * time you want, but you can never leave...
 */
void main(void) {
    
    setup_hardware();
    // srand(0x1234 ^ adc_read());

    unsigned char iteration = 0;
    unsigned char sample = 0;
    unsigned char sample_is_fresh = 0;
    
    unsigned int minbase = INT_MAX;
    unsigned int maxbase = 0;
    unsigned int minred = INT_MAX;
    unsigned int maxred = 0;
    unsigned int minir = INT_MAX;
    unsigned int maxir = 0;
    
    ERROR_LED = 0;
    BUSY_LED = 0;
    
#ifdef DEBUG_TIMINGS
    adc_burst_test_run();
#endif
    
    unsigned int base = adc_read(); // initial value is not actually used
    minbase = base;
    maxbase = base;
            
    // Note: only low bits of fastsample are actually used (ring buffer)
    for (unsigned char fastsample = 0; ; fastsample++) { // main loop

        // Start by reading ADC for dark, red and IR light situations
        // Subtract dark (ambient light) data to get more accurate values
        RED_LED = 0;
        IR_LED = 0;
#if (ASYM_SETTLE_SAMPLES_OFF < 0)
        if (sample == 0) { // once per round would be enough but...
            // we do not filter by fastsample, so we get N per round
            // where N is the decimate factor. No real problem here.
            for (unsigned char j=0; j < -(ASYM_SETTLE_SAMPLES_OFF); j++) {
                base = adc_read(); // discard some samples so LED & signal can settle
            }
            base = adc_read(); // measure ambient light, only once in a while
            minbase = base;
            maxbase = base;
            if (base > (INT_MAX / 8)) { // too much ambient light, bad!
                ERROR_LED = 1;
            }
        }
#else
        for (unsigned char j=0; j < ASYM_SETTLE_SAMPLES_OFF; j++) {
            base = adc_read(); // discard samples so LED and signal can settle
        }
        base = adc_read(); // measure ambient light for RED case
        if (base > (INT_MAX / 8)) { // too much ambient light, bad!
            ERROR_LED = 1;
        }
        if (base < minbase) {
            minbase = base;
        } else if (base > maxbase) {
            maxbase = base;
        }
#endif
        
        RED_LED = 1;
        IR_LED = 0;
        // Skip 1 sample after switching any LED on, so signal can settle
        (void)adc_read();
        unsigned int red = adc_read() - base; // measure RED light situation
        RED_LED = 0; // switch off as soon as possible
        IR_LED = 0;

        // if ambient changes quickly, "negative" values could have happened:
        if (red > INT_MAX) {
            red = 0;
            // ERROR_LED = 1;
        } else if ((red + base) > 28672u) { // too close to saturation
            ERROR_LED = 1;
        }
        if (red < minred) {
            minred = red;
        } else if (red > maxred) {
            maxred = red;
        }

        RED_LED = 0;
        IR_LED = 1;
        // Skip 1 sample after switching from RED to IR, so signal can settle
        (void)adc_read();
        unsigned int ir = adc_read() - base; // measure IR light situation
        RED_LED = 0;
        IR_LED = 0; // switch off as soon as possible

        // if ambient changes quickly, "negative" values could have happened:
        if (ir > INT_MAX) {
            ir = 0;
            // ERROR_LED = 1;
        } else if ((ir + base) > 28672u) { // too close to saturation
            ERROR_LED = 1;
        }
        if (ir < minir) {
            minir = ir;
        } else if (ir > maxir) {
            maxir = ir;
        }
        RED_LED = 0;
        IR_LED = 0;

#ifdef SIMULATOR
        ir = (unsigned int)rand();
        red = (unsigned int)rand();
#endif
        
        // Store most recent high sampling rate data in a small ring buffer:
        red_quick[fastsample & 15u] = red;
        ir_quick[fastsample & 15u] = ir;
        
#if (DECIMATE14 > 4) || (DECIMATE14 < 2)
#error "Only DECIMATE14 values 2, 3 and 4 supported yet"
#endif
        sample_is_fresh = 0;
        // do Red and IR filter at slightly different time steps for less jitter
#if (DECIMATE14 > 2)
        if ((fastsample & 3u) == 0) {
            red_curve[sample] = fourteen_tap_fir(&red_quick[0], fastsample & 15u);
        } else if ((fastsample & 3u) == 1) {
            ir_curve[sample] = fourteen_tap_fir(&ir_quick[0], fastsample & 15u);
        }
        if ((fastsample & 3u) == (DECIMATE14-1)) { // 2 or 3
            sample_is_fresh = 1;
            sample++;
        }
#else // DECIMATE14 == 2
        if ((fastsample & 1u) == 0) {
            red_curve[sample] = fourteen_tap_fir(&red_quick[0], fastsample & 15u);
        } else if ((fastsample & 1u) == 1) {
            ir_curve[sample] = fourteen_tap_fir(&ir_quick[0], fastsample & 15u);            
            sample_is_fresh = 1;
            sample++;
        }
#endif
        
        // Avoid doing anything fancy more often than the decimated sample rate
        if (sample_is_fresh == 0)
            continue;

        // make first 5 samples also available after CURVE_SIZE
        // trying to read samples at "0 .. 3 & 4 samples before" would wrap
        if (sample < 5) { // did sample 0, 1, 2 or 3 just get updated?
            sample--; // back to just updated sample
            unsigned char wrapped_sample = sample;
            // 0 .. 3 mirrored as CURVE_SIZE +1 .. +4
            wrapped_sample += CURVE_SIZE;
            // allows "if (sample-4) < 0 then just add CURVE_SIZE and use that"
            red_curve[wrapped_sample] = red_curve[sample];
            ir_curve[wrapped_sample] = ir_curve[sample];
            sample++; // forward again to next to be updated sample
        }
        
        // the low sampling rate data buffers are used as ring buffers, too
        if (sample >= CURVE_SIZE) {
            sample = 0;
            ERROR_LED = 0; // keep "error" state for at most one buffer "round"
            iteration++;
            if (iteration == 100) { // iteration is 0 only right after boot
                iteration = 1; // keep counter in 2 digit range
            }
        }

        // Also avoid analysis in the 0th iteration: Buffers are not full yet!
        if (iteration == 0)
            continue;

        /* *** Next: analysis of the curve buffers to compute the R-value *** */

        // *IDEA* use INCREMENTAL min / AC / max computations reduce CPU load

        // Linear approx instead of minimum can be nice, but takes too much CPU:
        // slope = (n * sum(xy) - sum(x) * sum(y)) / (n * sum(xx) - (sum(x))^2)
        // Y axis = mean(y) - slope * mean(x) ... or alternatively ...
        // Y axis = (n*sum(xx)*sum(y)-sum(x)*sum(xy))/(n*sum(xx)-(sum(x))^2)
        
#if (INFO_MODULO == 1)
        {
#elif (INFO_MODULO == 2)
        if ((sample & 1) == 0) {
#elif (INFO_MODULO == 4)
        if ((sample & 3) == 0) {
#else
        // goal of INFO_MODULO is to limit consumed UART bandwidth and CPU time
#error "INFO_MODULO has to be a small power of two"
#endif
            BUSY_LED = 1;
            
            // Note that each time that info is shown about ONE sample, a full
            // set of computations takes place using the WHOLE buffer context!

            // Instead of using minimum and difference, the more classic
            // but slower way would be to use average and abs(difference)
            // Real AC is slower: it needs mean(signal) & abs(ac) computations.

            // Compute two "AC" curves; actually "signal - min(signal)" curves
            // and the peak amplitude of each of the curves for R-value checks.
            // Low-pass BEFORE min/max check to not have to adjust to LP gain.

            // Note: 4 lowest samples mirrored at CURVE_SIZE to avoid wrap
            for (unsigned char i = 0; i < CURVE_SIZE; i++) {
                red_ac_curve[i] = five_tap_fir(&red_curve[i]);
                ir_ac_curve[i] = five_tap_fir(&ir_curve[i]);
            }
            // minred & minir of raw samples != red_min & ir_min of curves!
            unsigned int red_min = INT_MAX; // do not confuse with minred!
            unsigned int ir_min = INT_MAX; // do not confuse with minir!
            // Measure DC offsets
            for (unsigned char i = 0; i < CURVE_SIZE; i++) {
                if (red_ac_curve[i] < red_min) {
                    red_min = red_ac_curve[i];
                }
                if (ir_ac_curve[i] < ir_min) {
                    ir_min = ir_ac_curve[i];
                }
            }
            unsigned int red_ac_max = 0;
            unsigned int ir_ac_max = 0;
            // Remove DC offsets, measure AC "amplitudes"
            for (unsigned char i = 0; i < CURVE_SIZE; i++) {
                red_ac_curve[i] -= red_min; // remove DC offset
                if (red_ac_curve[i] > red_ac_max) {
                    red_ac_max = red_ac_curve[i];
                }
                ir_ac_curve[i] -= ir_min; // remove DC offset
                if (ir_ac_curve[i] > ir_ac_max) {
                    ir_ac_max = ir_ac_curve[i];
                }
            }

            // most recent sample is at "sample-1"
            unsigned char freshest = sample;
            if (sample == 0) {
                freshest = CURVE_SIZE;
            }
            freshest--;
            // most recent sample after low-pass is at "sample-5"
            unsigned char freshest_ac = sample;
            freshest_ac -= 5u;
            if (freshest_ac > CURVE_SIZE) { // in other words, if negative...
                freshest_ac += CURVE_SIZE;
            }
            BUSY_LED = 0;
            
#ifdef PULSE_TEXT
            // Raw curve output as semicolon separated data:
            // Rac, IRac, Rraw, IRraw, ...
            if (red_min < 28672 && ir_min < 28672) {
                uart_send(utoa(&textbuf[0], red_ac_curve[freshest_ac], 10));
                uart_send(";");
                uart_send(utoa(&textbuf[0], ir_ac_curve[freshest_ac], 10));
#if 1
                uart_send(";");
                uart_send(utoa(&textbuf[0], red_curve[freshest], 10));
                uart_send(";");
                uart_send(utoa(&textbuf[0], ir_curve[freshest], 10));
#endif
                // uart_send(";");
                // uart_send(utoa(&textbuf[0], freshest, 10));
                // uart_send(";");
                // uart_send(utoa(&textbuf[0], freshest_ac, 10));
                uart_send("\r\n");
            }
#endif
            
#ifdef PULSE_BARS
            if (red_min >= 28672 || ir_min >= 28672) {
                uart_send("(idle)\r"); // no \n // no finger detected
            } else if (red_ac_max < 128 || ir_ac_max < 128) {
                uart_send("(weak)\r"); // no \n // no pulse detected
            } else if (red_ac_max < 2048 && ir_ac_max < 2048) {
                // all AC curve items are at most 2048 each
                unsigned int bar = red_ac_curve[freshest];
                bar += red_ac_curve[freshest - 1u]; // average of two
                bar <<= 4; // in other words: average * 32
                bar += (red_ac_max >> 1); // rounding
                bar /= red_ac_max; // result is in range 0 to 31
                bar = 31u - bar; // more light on sensor means less blood!
                bargraph(&textbuf[0], bar); // 15 chars, space padded
                unsigned char rev1 = 14;
                for (unsigned char rev2 = 0; rev2 < 7; rev2++) {
                    // swap [0]..[6] with [14]..[8], keep [7] and [15]
                    unsigned char rchar = textbuf[rev1];
                    textbuf[rev1] = textbuf[rev2]; // flip bargraph
                    textbuf[rev2] = rchar;
                    rev1--;
                } // textbuf[15] left as-is (nul char)
                uart_send(&textbuf[0]); // e.g. "          #####"
                bar = ir_ac_curve[freshest] + ir_ac_curve[freshest - 1u];
                bar <<= 4; // in other words: average * 32
                bar += (ir_ac_max >> 1); // rounding
                bar /= ir_ac_max; // result is in range 0 to 31
                bar = 31u - bar; // more light on sensor means less blood!
                uart_send(bargraph(&textbuf[0], bar)); // e.g. "#####          "
                uart_send("\r"); // no \n
            } else { // AC amplitude suspiciously high
                uart_send("*NOISE*\r"); // no \n // too much AC, probably drift
            }
#endif
            
#ifdef PULSE_LED
            if ((red_min >= 28672u) || (ir_min >= 28672u)) {
                ERROR_LED = 1;
                DACCON1 = 31; // no finger detected: no light (4.1 = 5 - 0.9 V)
            } else if ((red_ac_max < 128u) || (ir_ac_max < 128u)) {
                ERROR_LED = 1;
                // 23/31 of 4.096 Volts = 3.0 Volts = 5 - 2 Volts = dim light
                DACCON1 = 23; // no pulse detected: show dim light
            } else if ((red_ac_max < 2048u) && (ir_ac_max < 2048u)) {
                ERROR_LED = 0;
                // show the sum of both AC curves on the same pulsating LED:
                unsigned int pulse = red_ac_curve[freshest];
                pulse += ir_ac_curve[freshest];
                // average over 2 AC samples for more smooth output
                if (freshest == 0) {
                    pulse += red_ac_curve[CURVE_SIZE - 1u];
                    pulse += ir_ac_curve[CURVE_SIZE - 1u];
                } else {
                    pulse += red_ac_curve[freshest - 1u];
                    pulse += ir_ac_curve[freshest - 1u];
                }
                // Higher values = more sensor light = less blood in finger!
                // Scale: curves at 0 give DAC output 0, at max give DAC at 24
                // pulse is max 8188 here, so pulse * 6 still fits in 16 bits
                pulse += pulse; // (sum of four) * 2 = (average of four) * 8
                pulse += pulse + pulse; // sum * (3 * 2) = average * 24
                pulse += (red_ac_max + ir_ac_max + 1) >> 2; // rounding
                pulse /= (red_ac_max + ir_ac_max + 1) >> 1; // "... / avg max"
                DACCON1 = ( (char *)(&pulse) )[0]; // low byte
            } else { // signal too noisy
                ERROR_LED = 1;
                DACCON1 = 31; // no light
            }
#endif
                        
            // calculate (red ac / red avg) / (ir ac / ir avg), fixed point,
            // by doing "(scale factor * red ac * ir avg) / (ir ac * red avg)"
            // We have space for a scale factor: AC is much smaller than DC :-)

            // Note: This uses "avg is circa min + (ac max / 2)" approximation
            unsigned long int ratio = red_ac_max;
            unsigned int dc_average;
            dc_average = ir_min + (ir_ac_max >> 1);
            ratio *= dc_average; // no overflow (15 bit values)
            unsigned long int divider = ir_ac_max;
            dc_average = red_min + (red_ac_max >> 1);
            divider *= dc_average; // no overflow (see above)
            divider++; // avoid zero
            if ((ratio < (LONG_MAX/100)) && ((ir_ac_max + red_ac_max) > 255)) {
                // AC vs DC range expectation tests passed: finger found :-)
                ratio <<= 2; // * 4
                unsigned long int ratio_mul = ratio << 3; // * 4 * 8 = * 32
                ratio = ratio + ratio_mul + ratio_mul + ratio_mul; // scale
                // ratio now is 4+(3*32) = 100 times the original value
            } else {
                ratio = 0; // no finger: too much DC, too little or too much AC
            }
            ratio /= divider; // one of very few big divisions needed here :-)

            // Expected R value: 0.4 to 3.4 for SpO2 100% to 0%
            // Healthy R values: 0.4 to 0.8 for SpO2 100% to 90%

            // Note: fraction indicates unit of ratio! 1, 10ths or 100ths!

            if (sample == 0) {
                // BUSY_LED = 1;
                
                // Output results via serial port: R-value ratio with decimals
                // red_average (int), red_ac_max (int, small value expected)
                // ir_average (int), ir_ac_max (int, small value expected)

                // Note: all values expected to be positive, so we can use
                // the smaller ultoa and utoa instead of ltoa and itoa :-)
                uart_send("R-value: ");
                unsigned int small_r = ((unsigned int *) &ratio)[0]; // cut to 16 bit
                if ((ratio > 32000) || (ratio == 0)) {
                    ERROR_LED = 1;
                    uart_send("----");
                } else {
                    ERROR_LED = 0;
                    // small_r is given in units of 0.01 for R-value
                    uart_send(utoa(&textbuf[0], small_r / 100, 10));
                    uart_send(".");
                    int digits = small_r % 100;
                    if (digits < 10) {
                        uart_send("0"); // pad to two decimals
                    }
                    uart_send(utoa(&textbuf[0], small_r % 100, 10));
                }

                // always show underlying RED and INFRARED AC and DC levels:
                uart_send("   Red "); // mean(...) vs min(...)
                uart_send(utoa(&textbuf[0], red_min, 10));
                uart_send(" + 0.."); // max(abs(...)) vs max(...)
                uart_send(utoa(&textbuf[0], red_ac_max, 10));
                uart_send("   IR "); // mean(...) vs min(...)
                uart_send(utoa(&textbuf[0], ir_min, 10));
                uart_send(" + 0.."); // max(abs(...)) vs max(...)
                uart_send(utoa(&textbuf[0], ir_ac_max, 10));
                // Too much ambient light is bad - disturbs measurements!
                uart_send("   ["); // Amount of ambient light hitting the sensor
                uart_send(utoa(&textbuf[0], minbase, 10));
                uart_send("..");
                uart_send(utoa(&textbuf[0], maxbase, 10));
                uart_send("] #");
                uart_send(utoa(&textbuf[0], iteration, 10)); // 1-2 round nr. digits
                uart_send("\r\n");
                BUSY_LED = 0;

            } // if right moment to print analysis results

        } // if right moment to do analysis
        
        if (sample == 0) {
            // Reset range statistics
            minred = INT_MAX;
            maxred = 0;
            minir = INT_MAX;
            maxir = 0;

#if (ASYM_SETTLE_SAMPLES_OFF >= 0)
            minbase = INT_MAX;
            maxbase = 0;
#else
            // (if base is only sampled once per round, never reset base range)
#endif
        }

    } // main loop
        
    // return;
}
