Skip to content

Commit e7fbfaa

Browse files
authored
Merge pull request #137 from gangmul12/deadlock_fix
Deadlock fix from stream_manager
2 parents 6a97d1e + f3ec233 commit e7fbfaa

File tree

3 files changed

+62
-42
lines changed

3 files changed

+62
-42
lines changed

libcuda/cuda_runtime_api.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1746,6 +1746,7 @@ __host__ cudaError_t CUDARTAPI cudaEventRecord(cudaEvent_t event, cudaStream_t s
17461746
if( !e ) return g_last_cudaError = cudaErrorUnknown;
17471747
struct CUstream_st *s = (struct CUstream_st *)stream;
17481748
stream_operation op(e,s);
1749+
e->issue();
17491750
g_stream_manager->push(op);
17501751
return g_last_cudaError = cudaSuccess;
17511752
}
@@ -1758,7 +1759,11 @@ __host__ cudaError_t CUDARTAPI cudaStreamWaitEvent(cudaStream_t stream, cudaEven
17581759
//reference: https://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/html/group__CUDART__STREAM_gfe68d207dc965685d92d3f03d77b0876.html
17591760
CUevent_st *e = get_event(event);
17601761
if( !e ){
1761-
printf("GPGPU-Sim API: Warning: cudaEventRecord has not been called on event before calling cudaStreamWaitEvent.\nNothing to be done.\n");
1762+
printf("GPGPU-Sim API: Error at cudaStreamWaitEvent. Event is not created .\n");
1763+
return g_last_cudaError = cudaErrorInvalidResourceHandle;
1764+
}
1765+
else if(e->num_issued() == 0){
1766+
printf("GPGPU-Sim API: Warning: cudaEventRecord has not been called on event before calling cudaStreamWaitEvent.\nNothing to be done.\n");
17621767
return g_last_cudaError = cudaSuccess;
17631768
}
17641769
if (!stream){

src/stream_manager.cc

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,14 +194,17 @@ bool stream_operation::do_operation( gpgpu_sim *gpu )
194194
m_stream->record_next_done();
195195
}
196196
break;
197-
case stream_wait_event: {
197+
case stream_wait_event:
198198
//only allows next op to go if event is done
199199
//otherwise stays in the stream queue
200200
printf("stream wait event processing...\n");
201-
if(m_event->done())
201+
if(m_event->num_updates()>=m_cnt){
202202
printf("stream wait event done\n");
203203
m_stream->record_next_done();
204204
}
205+
else{
206+
return false;
207+
}
205208
break;
206209
default:
207210
abort();
@@ -232,6 +235,7 @@ stream_manager::stream_manager( gpgpu_sim *gpu, bool cuda_launch_blocking )
232235
m_service_stream_zero = false;
233236
m_cuda_launch_blocking = cuda_launch_blocking;
234237
pthread_mutex_init(&m_lock,NULL);
238+
m_last_stream = m_streams.begin();
235239
}
236240

237241
bool stream_manager::operation( bool * sim)
@@ -330,11 +334,16 @@ stream_operation stream_manager::front()
330334
m_service_stream_zero = false;
331335
}
332336
}
333-
334337
if(!m_service_stream_zero)
335338
{
336-
std::list<struct CUstream_st*>::iterator s;
337-
for( s=m_streams.begin(); s != m_streams.end(); s++) {
339+
std::list<struct CUstream_st*>::iterator s = m_last_stream;
340+
if(m_last_stream == m_streams.end()){ s = m_streams.begin(); }
341+
else{ s++; }
342+
for(size_t ii = 0 ; ii < m_streams.size(); ii++, s++) {
343+
if(s == m_streams.end()){
344+
s = m_streams.begin();
345+
}
346+
m_last_stream = s;
338347
CUstream_st *stream = *s;
339348
if( !stream->busy() && !stream->empty() ) {
340349
result = stream->next();
@@ -371,6 +380,7 @@ void stream_manager::destroy_stream( CUstream_st *stream )
371380
}
372381
}
373382
delete stream;
383+
m_last_stream = m_streams.begin();
374384
pthread_mutex_unlock(&m_lock);
375385
}
376386

src/stream_manager.h

Lines changed: 41 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,45 @@
4343
// unsigned m_pending_streams;
4444
//};
4545

46+
struct CUevent_st {
47+
public:
48+
CUevent_st( bool blocking )
49+
{
50+
m_uid = ++m_next_event_uid;
51+
m_blocking = blocking;
52+
m_updates = 0;
53+
m_wallclock = 0;
54+
m_gpu_tot_sim_cycle = 0;
55+
m_issued = 0;
56+
m_done = false;
57+
}
58+
void update( double cycle, time_t clk )
59+
{
60+
m_updates++;
61+
m_wallclock=clk;
62+
m_gpu_tot_sim_cycle=cycle;
63+
m_done = true;
64+
}
65+
//void set_done() { assert(!m_done); m_done=true; }
66+
int get_uid() const { return m_uid; }
67+
unsigned num_updates() const { return m_updates; }
68+
bool done() const { return m_updates==m_issued; }
69+
time_t clock() const { return m_wallclock; }
70+
void issue(){ m_issued++; }
71+
unsigned int num_issued() const{ return m_issued; }
72+
private:
73+
int m_uid;
74+
bool m_blocking;
75+
bool m_done;
76+
int m_updates;
77+
unsigned int m_issued;
78+
time_t m_wallclock;
79+
double m_gpu_tot_sim_cycle;
80+
81+
static int m_next_event_uid;
82+
};
83+
84+
4685
enum stream_operation_type {
4786
stream_no_op,
4887
stream_memcpy_host_to_device,
@@ -107,6 +146,7 @@ class stream_operation {
107146
m_kernel=NULL;
108147
m_type=stream_wait_event;
109148
m_event=e;
149+
m_cnt = m_event->num_issued();
110150
m_stream=stream;
111151
m_done=false;
112152
}
@@ -163,7 +203,6 @@ class stream_operation {
163203
void print( FILE *fp ) const;
164204
struct CUstream_st *get_stream() { return m_stream; }
165205
void set_stream( CUstream_st *stream ) { m_stream = stream; }
166-
167206
private:
168207
struct CUstream_st *m_stream;
169208

@@ -183,41 +222,6 @@ class stream_operation {
183222
kernel_info_t *m_kernel;
184223
struct CUevent_st *m_event;
185224
};
186-
187-
struct CUevent_st {
188-
public:
189-
CUevent_st( bool blocking )
190-
{
191-
m_uid = ++m_next_event_uid;
192-
m_blocking = blocking;
193-
m_updates = 0;
194-
m_wallclock = 0;
195-
m_gpu_tot_sim_cycle = 0;
196-
m_done = false;
197-
}
198-
void update( double cycle, time_t clk )
199-
{
200-
m_updates++;
201-
m_wallclock=clk;
202-
m_gpu_tot_sim_cycle=cycle;
203-
m_done = true;
204-
}
205-
//void set_done() { assert(!m_done); m_done=true; }
206-
int get_uid() const { return m_uid; }
207-
unsigned num_updates() const { return m_updates; }
208-
bool done() const { return m_done; }
209-
time_t clock() const { return m_wallclock; }
210-
private:
211-
int m_uid;
212-
bool m_blocking;
213-
bool m_done;
214-
int m_updates;
215-
time_t m_wallclock;
216-
double m_gpu_tot_sim_cycle;
217-
218-
static int m_next_event_uid;
219-
};
220-
221225
struct CUstream_st {
222226
public:
223227
CUstream_st();
@@ -268,6 +272,7 @@ class stream_manager {
268272
CUstream_st m_stream_zero;
269273
bool m_service_stream_zero;
270274
pthread_mutex_t m_lock;
275+
std::list<struct CUstream_st*>::iterator m_last_stream;
271276
};
272277

273278
#endif

0 commit comments

Comments
 (0)