1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
//! Delay implementations

use core::marker;
use embedded_hal::delay::DelayNs;
use embedded_hal_v0::blocking::delay as delay_v0;

#[cfg(all(target_arch = "avr", avr_hal_asm_macro))]
use core::arch::asm;

/// A busy-loop delay implementation
///
/// # Example
/// ```rust
/// // Instead of arduino_hal below you may also use a different
/// // HAL based on avr_hal_generic like attiny_hal or atmega_hal
/// // depending on actual hardware. For example:
/// //
/// // use attiny_hal as hal;
///
/// use arduino_hal as hal;
/// use embedded_hal_v0::prelude::*;
///
/// let mut delay = embedded_hal_v0::delay::Delay::<hal::clock::MHz16>::new();
///
/// // Wait 1 second
/// delay.delay_ms(1000);
/// ```
///
/// # Warning
/// The delay is not accurate for values above 4095µs because of a loop whose
/// overhead is not accounted for.  This will be fixed in a future version.
#[derive(Debug, Clone, Copy)]
pub struct Delay<SPEED> {
    _speed: marker::PhantomData<SPEED>,
}

impl<SPEED> Delay<SPEED> {
    pub fn new() -> Delay<SPEED> {
        Delay {
            _speed: marker::PhantomData,
        }
    }
}

// based on https://github.com/arduino/ArduinoCore-avr/blob/master/cores/arduino/wiring.c

cfg_if::cfg_if! {
    if #[cfg(all(target_arch = "avr", avr_hal_asm_macro))] {
        #[allow(unused_assignments)]
        fn busy_loop(mut c: u16) {
            unsafe {
                asm!(
                    "1:",
                    "sbiw {c}, 1",
                    "brne 1b",
                    c = inout(reg_iw) c,
                );
            }
        }
    } else if #[cfg(target_arch = "avr")] {
        #[allow(unused_assignments)]
        fn busy_loop(mut c: u16) {
            unsafe {
                llvm_asm!("1: sbiw $0,1\n\tbrne 1b"
                     : "=w"(c)
                     : "0"(c)
                     :
                     : "volatile"
                 );
            }
        }
    } else {
        fn busy_loop(_c: u16) {
            unimplemented!("Implementation is only available for avr targets!")
        }
    }
}

// Clock-Specific Delay Implementations ----------------------------------- {{{
impl delay_v0::DelayUs<u16> for Delay<crate::clock::MHz24> {
    fn delay_us(&mut self, mut us: u16) {
        // for the 24 crate::clock::MHz clock for the aventurous ones, trying to overclock

        // zero delay fix
        if us == 0 {
            return;
        } // = 3 cycles, (4 when true)

        // the following loop takes a 1/6 of a microsecond (4 cycles)
        // per iteration, so execute it six times for each microsecond of
        // delay requested.
        us *= 6; // x6 us, = 7 cycles

        // account for the time taken in the preceeding commands.
        // we just burned 22 (24) cycles above, remove 5, (5*4=20)
        // us is at least 6 so we can substract 5
        us -= 5; //=2 cycles

        busy_loop(us);
    }
}

impl delay_v0::DelayUs<u16> for Delay<crate::clock::MHz20> {
    fn delay_us(&mut self, mut us: u16) {
        // for the 20 crate::clock::MHz clock on rare Arduino boards

        // for a one-microsecond delay, simply return.  the overhead
        // of the function call takes 18 (20) cycles, which is 1us
        #[cfg(all(target_arch = "avr", avr_hal_asm_macro))]
        unsafe {
            asm!("nop", "nop", "nop", "nop");
        }

        #[cfg(all(target_arch = "avr", not(avr_hal_asm_macro)))]
        unsafe {
            llvm_asm!("nop\nnop\nnop\nnop" :::: "volatile");
        }

        if us <= 1 {
            return;
        } // = 3 cycles, (4 when true)

        // the following loop takes a 1/5 of a microsecond (4 cycles)
        // per iteration, so execute it five times for each microsecond of
        // delay requested.
        us = (us << 2) + us; // x5 us, = 7 cycles

        // account for the time taken in the preceeding commands.
        // we just burned 26 (28) cycles above, remove 7, (7*4=28)
        // us is at least 10 so we can substract 7
        us -= 7; // 2 cycles

        busy_loop(us);
    }
}

impl delay_v0::DelayUs<u16> for Delay<crate::clock::MHz16> {
    fn delay_us(&mut self, mut us: u16) {
        // for the 16 crate::clock::MHz clock on most Arduino boards

        // for a one-microsecond delay, simply return.  the overhead
        // of the function call takes 14 (16) cycles, which is 1us
        if us <= 1 {
            return;
        } // = 3 cycles, (4 when true)

        // the following loop takes 1/4 of a microsecond (4 cycles)
        // per iteration, so execute it four times for each microsecond of
        // delay requested.
        us <<= 2; // x4 us, = 4 cycles

        // account for the time taken in the preceeding commands.
        // we just burned 19 (21) cycles above, remove 5, (5*4=20)
        // us is at least 8 so we can substract 5
        us -= 5; // = 2 cycles,

        busy_loop(us);
    }
}

impl delay_v0::DelayUs<u16> for Delay<crate::clock::MHz12> {
    fn delay_us(&mut self, mut us: u16) {
        // for the 12 crate::clock::MHz clock if somebody is working with USB

        // for a 1 microsecond delay, simply return.  the overhead
        // of the function call takes 14 (16) cycles, which is 1.5us
        if us <= 1 {
            return;
        } // = 3 cycles, (4 when true)

        // the following loop takes 1/3 of a microsecond (4 cycles)
        // per iteration, so execute it three times for each microsecond of
        // delay requested.
        us = (us << 1) + us; // x3 us, = 5 cycles

        // account for the time taken in the preceeding commands.
        // we just burned 20 (22) cycles above, remove 5, (5*4=20)
        // us is at least 6 so we can substract 5
        us -= 5; //2 cycles

        busy_loop(us);
    }
}

impl delay_v0::DelayUs<u16> for Delay<crate::clock::MHz10> {
    fn delay_us(&mut self, mut us: u16) {
        // for the 10 crate::clock::MHz clock if somebody is working with USB

        // for a 1 microsecond delay, simply return.  the overhead
        // of the function call takes 14 (16) cycles, which is 1.5us
        if us <= 1 {
            return;
        } // = 3 cycles, (4 when true)

        // 4 cycles per busy_loop iteration = 0.4 us per busy loop, so 2.5 times to get 1 us
        us = ((us << 2) + us) >> 1; // x2.5

        busy_loop(us);
    }
}

impl delay_v0::DelayUs<u16> for Delay<crate::clock::MHz8> {
    fn delay_us(&mut self, mut us: u16) {
        // for the 8 crate::clock::MHz internal clock

        // for a 1 and 2 microsecond delay, simply return.  the overhead
        // of the function call takes 14 (16) cycles, which is 2us
        if us <= 2 {
            return;
        } // = 3 cycles, (4 when true)

        // the following loop takes 1/2 of a microsecond (4 cycles)
        // per iteration, so execute it twice for each microsecond of
        // delay requested.
        us <<= 1; //x2 us, = 2 cycles

        // account for the time taken in the preceeding commands.
        // we just burned 17 (19) cycles above, remove 4, (4*4=16)
        // us is at least 6 so we can substract 4
        us -= 4; // = 2 cycles

        busy_loop(us);
    }
}

impl delay_v0::DelayUs<u16> for Delay<crate::clock::MHz1> {
    fn delay_us(&mut self, mut us: u16) {
        // for the 1 crate::clock::MHz internal clock (default settings for common Atmega microcontrollers)

        // the overhead of the function calls is 14 (16) cycles
        if us <= 16 {
            return;
        } //= 3 cycles, (4 when true)
        if us <= 25 {
            return;
        } //= 3 cycles, (4 when true), (must be at least 25 if we want to substract 22)

        // compensate for the time taken by the preceeding and next commands (about 22 cycles)
        us -= 22; // = 2 cycles
                  // the following loop takes 4 microseconds (4 cycles)
                  // per iteration, so execute it us/4 times
                  // us is at least 4, divided by 4 gives us 1 (no zero delay bug)
        us >>= 2; // us div 4, = 4 cycles

        busy_loop(us);
    }
}

// ------------------------------------------------------------------------ }}}

impl<SPEED> delay_v0::DelayUs<u8> for Delay<SPEED>
where
    Delay<SPEED>: delay_v0::DelayUs<u16>,
{
    fn delay_us(&mut self, us: u8) {
        delay_v0::DelayUs::<u16>::delay_us(self, us as u16);
    }
}

impl<SPEED> delay_v0::DelayUs<u32> for Delay<SPEED>
where
    Delay<SPEED>: delay_v0::DelayUs<u16>,
{
    fn delay_us(&mut self, us: u32) {
        // TODO: Somehow fix the overhead induced by this loop
        // This was previously a range-based for loop, but that would
        // compile down to fairly poor code. This is slightly better,
        // but still has some overhead and may not lead to cycle-accurate
        // delays.
        let iters = us >> 12;
        let mut i = 0;
        while i < iters {
            delay_v0::DelayUs::<u16>::delay_us(self, 0xfff);
            i += 1;
        }
        delay_v0::DelayUs::<u16>::delay_us(self, (us & 0xfff) as u16);
    }
}

impl<SPEED> delay_v0::DelayMs<u16> for Delay<SPEED>
where
    Delay<SPEED>: delay_v0::DelayUs<u32>,
{
    fn delay_ms(&mut self, ms: u16) {
        delay_v0::DelayUs::<u32>::delay_us(self, ms as u32 * 1000);
    }
}

impl<SPEED> delay_v0::DelayMs<u8> for Delay<SPEED>
where
    Delay<SPEED>: delay_v0::DelayMs<u16>,
{
    fn delay_ms(&mut self, ms: u8) {
        delay_v0::DelayMs::<u16>::delay_ms(self, ms as u16);
    }
}

impl<SPEED> DelayNs for Delay<SPEED>
where
    Delay<SPEED>: delay_v0::DelayUs<u16>,
{
    fn delay_ns(&mut self, ns: u32) {
        // quick-win to get an initial implementation.
        // note that the trait does not guarantee nanosecond-accuracy.
        delay_v0::DelayUs::<u32>::delay_us(self, ns.div_ceil(1000))
    }

    fn delay_us(&mut self, us: u32) {
        delay_v0::DelayUs::<u32>::delay_us(self, us);
    }
}