1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
//! Events can be used to track status and dependencies, as well as to measure
//! the duration of work submitted to a CUDA stream.
//!
//! In CUDA, most work is performed asynchronously. Events help to manage tasks
//! scheduled on an asynchronous stream. This includes waiting for a task (or
//! multiple tasks) to complete, and measuring the time duration it takes to
//! complete a task. Events can also be used to sequence tasks on multiple
//! streams within the same context by specifying dependent tasks (not supported
//! yet by RustaCUDA).
//!
//! Events may be reused multiple times.

// TODO: I'm not sure that these events are/can be safe by Rust's model of safety; they inherently
// create state which can be mutated even while an immutable borrow is held.

use crate::cuda_sys::cuda::{
    cuEventCreate, cuEventDestroy_v2, cuEventElapsedTime, cuEventQuery, cuEventRecord,
    cuEventSynchronize, CUevent,
};
use crate::error::{CudaError, CudaResult, DropResult, ToResult};
use crate::stream::Stream;

use std::mem;
use std::ptr;

bitflags! {
    /// Bit flags for configuring a CUDA Event.
    ///
    /// The CUDA documentation claims that setting `DISABLE_TIMING` and `BLOCKING_SYNC` provides
    /// the best performance for `query()` and `stream.wait_event()`.
    pub struct EventFlags: u32 {
        /// The default event creation flag.
        const DEFAULT = 0x0;

        /// Specify that the created event should busy-wait on blocking
        /// function calls.
        const BLOCKING_SYNC = 0x1;

        /// Specify that the created event does not need to record timing data.
        const DISABLE_TIMING = 0x2;

        /// Specify that the created event may be used as an interprocess event.
        /// (not supported yet by RustaCUDA). This flag requires
        /// `DISABLE_TIMING` to be set as well.
        const INTERPROCESS = 0x4;
    }
}

/// Status enum that represents the current status of an event.
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum EventStatus {
    /// Ready indicates that all work captured by the event has been completed.
    ///
    /// The CUDA documentation states that for Unified Memory, `EventStatus::Ready` is
    /// equivalent to having called `Event::synchronize`.
    Ready,

    /// `EventStatus::NotReady` indicates that the work captured by the event is still
    /// incomplete.
    NotReady,
}

/// An event to track work submitted to a stream.
///
/// See the module-level documentation for more information.
#[derive(Debug)]
pub struct Event(CUevent);

impl Event {
    /// Create a new event with the specified flags.
    ///
    /// # Example
    ///
    /// ```
    /// # use rustacuda::quick_init;
    /// # use std::error::Error;
    /// # fn main() -> Result<(), Box<dyn Error>> {
    /// # let _context = quick_init()?;
    /// use rustacuda::event::{Event, EventFlags};
    ///
    /// // With default settings
    /// let event = Event::new(EventFlags::DEFAULT)?;
    /// # Ok(())
    /// # }
    /// ```
    pub fn new(flags: EventFlags) -> CudaResult<Self> {
        unsafe {
            let mut event: CUevent = mem::zeroed();
            cuEventCreate(&mut event, flags.bits()).to_result()?;
            Ok(Event(event))
        }
    }

    /// Add the event to the given stream of work. The event will be completed when the stream
    /// completes all previously-submitted work and reaches the event in the queue.
    ///
    /// This function is used together with `query`, `synchronize`, and
    /// `elapsed_time_f32`. See the respective functions for more information.
    ///
    /// If the event is created with `EventFlags::BLOCKING_SYNC`, then `record`
    /// blocks until the event has actually been recorded.
    ///
    /// # Errors
    ///
    /// If the event and stream are not from the same context, an error is
    /// returned.
    ///
    /// # Example
    ///
    /// ```
    /// # use rustacuda::quick_init;
    /// # use rustacuda::stream::{Stream, StreamFlags};
    /// # use std::error::Error;
    /// # fn main() -> Result<(), Box<dyn Error>> {
    /// # let _context = quick_init()?;
    /// use rustacuda::event::{Event, EventFlags};
    ///
    /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
    /// let event = Event::new(EventFlags::DEFAULT)?;
    ///
    /// // submit some work ...
    ///
    /// event.record(&stream)?;
    /// # Ok(())
    /// }
    /// ```
    pub fn record(&self, stream: &Stream) -> CudaResult<()> {
        unsafe {
            cuEventRecord(self.0, stream.as_inner()).to_result()?;
            Ok(())
        }
    }

    /// Return whether the stream this event was recorded on (see `record`) has processed this event
    /// yet or not. A return value of `EventStatus::Ready` indicates that all work submitted before
    /// the event has been completed.
    ///
    /// # Example
    ///
    /// ```
    /// # use rustacuda::quick_init;
    /// # use rustacuda::stream::{Stream, StreamFlags};
    /// # use std::error::Error;
    /// # fn main() -> Result<(), Box<dyn Error>> {
    /// # let _context = quick_init()?;
    /// use rustacuda::event::{Event, EventFlags, EventStatus};
    ///
    /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
    /// let event = Event::new(EventFlags::DEFAULT)?;
    ///
    /// // do some work ...
    ///
    /// // record an event
    /// event.record(&stream)?;
    ///
    /// // ... wait some time ...
    /// # event.synchronize()?;
    ///
    /// // query if the work is finished
    /// let status = event.query()?;
    /// assert_eq!(status, EventStatus::Ready);
    /// # Ok(())
    /// }
    /// ```
    pub fn query(&self) -> CudaResult<EventStatus> {
        let result = unsafe { cuEventQuery(self.0).to_result() };

        match result {
            Ok(()) => Ok(EventStatus::Ready),
            Err(CudaError::NotReady) => Ok(EventStatus::NotReady),
            Err(other) => Err(other),
        }
    }

    /// Wait for an event to complete.
    ///
    /// Blocks thread execution until all work submitted before the event was
    /// recorded has completed. `EventFlags::BLOCKING_SYNC` controls the mode of
    /// blocking. If the flag is set on event creation, the thread will sleep.
    /// Otherwise, the thread will busy-wait.
    ///
    /// # Example
    ///
    /// ```
    /// # use rustacuda::quick_init;
    /// # use rustacuda::stream::{Stream, StreamFlags};
    /// # use std::error::Error;
    /// # fn main() -> Result<(), Box<dyn Error>> {
    /// # let _context = quick_init()?;
    /// use rustacuda::event::{Event, EventFlags};
    ///
    /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
    /// let event = Event::new(EventFlags::DEFAULT)?;
    ///
    /// // do some work ...
    ///
    /// // record an event
    /// event.record(&stream)?;
    ///
    /// // wait until the work is finished
    /// event.synchronize()?;
    /// # Ok(())
    /// }
    /// ```
    pub fn synchronize(&self) -> CudaResult<()> {
        unsafe {
            cuEventSynchronize(self.0).to_result()?;
            Ok(())
        }
    }

    /// Return the duration between two events.
    ///
    /// The duration is computed in milliseconds with a resolution of
    /// approximately 0.5 microseconds. This can be used to measure the duration of work
    /// queued in between the two events.
    ///
    /// # Errors
    ///
    /// `CudaError::NotReady` is returned if either event is not yet complete.
    ///
    /// `CudaError::InvalidHandle` is returned if
    /// - the two events are not from the same context, or if
    /// - `record` has not been called on either event, or if
    /// - the `DISABLE_TIMING` flag is set on either event.
    ///
    /// # Example
    ///
    /// ```
    /// # use rustacuda::quick_init;
    /// # use rustacuda::stream::{Stream, StreamFlags};
    /// # use rustacuda::launch;
    /// # use rustacuda::module::Module;
    /// # use rustacuda::memory::DeviceBox;
    /// # use std::error::Error;
    /// # use std::ffi::CString;
    /// # fn main() -> Result<(), Box<dyn Error>> {
    /// # let _context = quick_init()?;
    /// # let module_data = CString::new(include_str!("../resources/add.ptx"))?;
    /// # let module = Module::load_from_string(&module_data)?;
    /// # let mut x = DeviceBox::new(&10.0f32)?;
    /// # let mut y = DeviceBox::new(&20.0f32)?;
    /// # let mut result = DeviceBox::new(&0.0f32)?;
    /// use rustacuda::event::{Event, EventFlags};
    ///
    /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
    /// let start_event = Event::new(EventFlags::DEFAULT)?;
    /// let stop_event = Event::new(EventFlags::DEFAULT)?;
    ///
    /// // start recording time
    /// start_event.record(&stream)?;
    ///
    /// // do some work ...
    /// # unsafe {
    /// #    launch!(module.sum<<<1, 1, 0, stream>>>(
    /// #            x.as_device_ptr(),
    /// #            y.as_device_ptr(),
    /// #            result.as_device_ptr(),
    /// #            1 // Length
    /// #            ))?;
    /// # }
    ///
    /// // stop recording time
    /// stop_event.record(&stream)?;
    ///
    /// // wait for the work to complete
    /// stop_event.synchronize()?;
    ///
    /// // compute the time elapsed between the start and stop events
    /// let time = stop_event.elapsed_time_f32(&start_event)?;
    ///
    /// # assert!(time > 0.0);
    /// # Ok(())
    /// }
    /// ```
    pub fn elapsed_time_f32(&self, start: &Self) -> CudaResult<f32> {
        unsafe {
            let mut millis: f32 = 0.0;
            cuEventElapsedTime(&mut millis, start.0, self.0).to_result()?;
            Ok(millis)
        }
    }

    // Get the inner `CUevent` from the `Event`.
    //
    // Necessary for certain CUDA functions outside of this
    // module that expect a bare `CUevent`.
    pub(crate) fn as_inner(&self) -> CUevent {
        self.0
    }

    /// Destroy an `Event` returning an error.
    ///
    /// Destroying an event can return errors from previous asynchronous work.
    /// This function destroys the given event and returns the error and the
    /// un-destroyed event on failure.
    ///
    /// # Example
    ///
    /// ```
    /// # use rustacuda::*;
    /// # use std::error::Error;
    /// # fn main() -> Result<(), Box<dyn Error>> {
    /// # let _context = quick_init()?;
    /// use rustacuda::event::{Event, EventFlags};
    ///
    /// let event = Event::new(EventFlags::DEFAULT)?;
    /// match Event::drop(event) {
    ///     Ok(()) => println!("Successfully destroyed"),
    ///     Err((cuda_error, event)) => {
    ///         println!("Failed to destroy event: {:?}", cuda_error);
    ///         // Do something with event
    ///     },
    /// }
    /// # Ok(())
    /// # }
    /// ```
    pub fn drop(mut event: Event) -> DropResult<Event> {
        if event.0.is_null() {
            return Ok(());
        }

        unsafe {
            let inner = mem::replace(&mut event.0, ptr::null_mut());
            match cuEventDestroy_v2(inner).to_result() {
                Ok(()) => {
                    mem::forget(event);
                    Ok(())
                }
                Err(e) => Err((e, Event(inner))),
            }
        }
    }
}

impl Drop for Event {
    fn drop(&mut self) {
        unsafe { cuEventDestroy_v2(self.0) }
            .to_result()
            .expect("Failed to destroy CUDA event");
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::quick_init;
    use crate::stream::StreamFlags;
    use std::error::Error;

    #[test]
    fn test_new_with_flags() -> Result<(), Box<dyn Error>> {
        let _context = quick_init()?;
        let _event = Event::new(EventFlags::BLOCKING_SYNC | EventFlags::DISABLE_TIMING)?;
        Ok(())
    }

    #[test]
    fn test_record_with_wrong_context() -> Result<(), Box<dyn Error>> {
        let _context = quick_init()?;
        let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
        let _new_context = quick_init()?;
        let event = Event::new(EventFlags::DEFAULT)?;
        let result = event.record(&stream);
        assert_eq!(result, Err(CudaError::InvalidHandle));
        Ok(())
    }

    #[test]
    fn test_elapsed_time_f32_with_wrong_context() -> Result<(), Box<dyn Error>> {
        let _context = quick_init()?;
        let fst_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
        let fst_event = Event::new(EventFlags::DEFAULT)?;
        fst_event.record(&fst_stream)?;

        let _context = quick_init()?;
        let snd_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
        let snd_event = Event::new(EventFlags::DEFAULT)?;
        snd_event.record(&snd_stream)?;

        fst_event.synchronize()?;
        snd_event.synchronize()?;
        let result = snd_event.elapsed_time_f32(&fst_event);
        assert_eq!(result, Err(CudaError::InvalidHandle));
        Ok(())
    }

    #[test]
    fn test_elapsed_time_f32_with_different_streams() -> Result<(), Box<dyn Error>> {
        let _context = quick_init()?;
        let fst_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
        let fst_event = Event::new(EventFlags::DEFAULT)?;
        fst_event.record(&fst_stream)?;

        let snd_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
        let snd_event = Event::new(EventFlags::DEFAULT)?;
        snd_event.record(&snd_stream)?;

        fst_event.synchronize()?;
        snd_event.synchronize()?;
        let _result = snd_event.elapsed_time_f32(&fst_event)?;
        Ok(())
    }

    #[test]
    fn test_elapsed_time_f32_with_disable_timing() -> Result<(), Box<dyn Error>> {
        let _context = quick_init()?;
        let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;

        let start_event = Event::new(EventFlags::DISABLE_TIMING)?;
        start_event.record(&stream)?;

        let stop_event = Event::new(EventFlags::DEFAULT)?;
        stop_event.record(&stream)?;

        stop_event.synchronize()?;
        let result = stop_event.elapsed_time_f32(&start_event);
        assert_eq!(result, Err(CudaError::InvalidHandle));
        Ok(())
    }
}