Performance and Debugging
This final tutorial covers performance measurement, debugging techniques, and error handling for Argobots applications. These skills are essential for optimizing and troubleshooting Mochi services.
Performance Measurement
Argobots provides timing functions for performance analysis:
1/*
2 * Performance measurement with Argobots timing functions
3 */
4
5#include <stdio.h>
6#include <abt.h>
7
8#define NUM_ITERATIONS 1000
9#define NUM_THREADS 4
10
11void computation_work(void *arg)
12{
13 int id = *(int *)arg;
14 volatile long sum = 0;
15
16 for (int i = 0; i < 100000; i++) {
17 sum += i;
18 }
19}
20
21int main(int argc, char **argv)
22{
23 ABT_xstream xstream;
24 ABT_pool pool;
25 ABT_thread threads[NUM_THREADS];
26 ABT_timer timer;
27 int thread_ids[NUM_THREADS];
28 double start_time, end_time, elapsed;
29
30 ABT_init(argc, argv);
31
32 printf("=== Performance Measurement ===\n\n");
33
34 ABT_xstream_self(&xstream);
35 ABT_xstream_get_main_pools(xstream, 1, &pool);
36
37 /* Method 1: Using ABT_get_wtime() */
38 printf("Method 1: ABT_get_wtime()\n");
39 start_time = ABT_get_wtime();
40
41 for (int iter = 0; iter < NUM_ITERATIONS; iter++) {
42 for (int i = 0; i < NUM_THREADS; i++) {
43 thread_ids[i] = i;
44 ABT_thread_create(pool, computation_work, &thread_ids[i],
45 ABT_THREAD_ATTR_NULL, &threads[i]);
46 }
47 for (int i = 0; i < NUM_THREADS; i++) {
48 ABT_thread_free(&threads[i]);
49 }
50 }
51
52 end_time = ABT_get_wtime();
53 elapsed = end_time - start_time;
54 printf(" Total time: %.6f seconds\n", elapsed);
55 printf(" Per iteration: %.6f ms\n", (elapsed / NUM_ITERATIONS) * 1000);
56 printf(" Per thread: %.6f us\n\n", (elapsed / (NUM_ITERATIONS * NUM_THREADS)) * 1000000);
57
58 /* Method 2: Using ABT_timer */
59 printf("Method 2: ABT_timer\n");
60 ABT_timer_create(&timer);
61
62 ABT_timer_start(timer);
63
64 for (int iter = 0; iter < NUM_ITERATIONS; iter++) {
65 for (int i = 0; i < NUM_THREADS; i++) {
66 thread_ids[i] = i;
67 ABT_thread_create(pool, computation_work, &thread_ids[i],
68 ABT_THREAD_ATTR_NULL, &threads[i]);
69 }
70 for (int i = 0; i < NUM_THREADS; i++) {
71 ABT_thread_free(&threads[i]);
72 }
73 }
74
75 ABT_timer_stop(timer);
76 ABT_timer_read(timer, &elapsed);
77
78 printf(" Total time: %.6f seconds\n", elapsed);
79 printf(" Per iteration: %.6f ms\n", (elapsed / NUM_ITERATIONS) * 1000);
80 printf(" Per thread: %.6f us\n\n", (elapsed / (NUM_ITERATIONS * NUM_THREADS)) * 1000000);
81
82 ABT_timer_free(&timer);
83
84 printf("Use timing to identify performance bottlenecks\n");
85
86 ABT_finalize();
87 return 0;
88}
Two Timing Methods:
ABT_get_wtime() (lines 38, 51): Simple wall-clock time - Lightweight, minimal overhead - Returns current time in seconds (double) - Use for quick measurements
ABT_timer (lines 59-76): Timer object - Start, stop, read operations - Can be reused multiple times - Slightly more overhead but cleaner for repeated measurements
- Performance Metrics:
Total execution time
Per-iteration time
Per-work-unit time
Throughput (operations/second)
Debugging with Info Functions
Argobots provides introspection APIs for debugging:
1/*
2 * Debugging with Argobots info functions
3 */
4
5#include <stdio.h>
6#include <abt.h>
7
8#define NUM_XSTREAMS 2
9#define NUM_THREADS 3
10
11void worker_thread(void *arg)
12{
13 int id = *(int *)arg;
14 printf("Worker %d executing\n", id);
15}
16
17int main(int argc, char **argv)
18{
19 ABT_xstream xstreams[NUM_XSTREAMS];
20 ABT_thread threads[NUM_THREADS];
21 int thread_ids[NUM_THREADS];
22
23 ABT_init(argc, argv);
24
25 printf("=== Debugging with Info Functions ===\n\n");
26
27 /* Print Argobots configuration */
28 printf("1. Argobots Configuration:\n");
29 ABT_info_print_config(stdout);
30 printf("\n");
31
32 /* Create additional xstream */
33 ABT_xstream_self(&xstreams[0]);
34 ABT_xstream_create(ABT_SCHED_NULL, &xstreams[1]);
35
36 /* Create threads on primary xstream */
37 ABT_pool pool;
38 ABT_xstream_get_main_pools(xstreams[0], 1, &pool);
39
40 for (int i = 0; i < NUM_THREADS; i++) {
41 thread_ids[i] = i;
42 ABT_thread_create(pool, worker_thread, &thread_ids[i],
43 ABT_THREAD_ATTR_NULL, &threads[i]);
44 }
45
46 /* Print all execution streams */
47 printf("2. All Execution Streams:\n");
48 ABT_info_print_all_xstreams(stdout);
49 printf("\n");
50
51 /* Print specific thread information */
52 printf("3. Thread Information:\n");
53 for (int i = 0; i < NUM_THREADS; i++) {
54 ABT_thread_state state;
55 ABT_thread_get_state(threads[i], &state);
56 printf(" Thread %d state: %d\n", i, state);
57 }
58 printf("\n");
59
60 /* Wait for threads */
61 for (int i = 0; i < NUM_THREADS; i++) {
62 ABT_thread_free(&threads[i]);
63 }
64
65 /* Cleanup */
66 ABT_xstream_join(xstreams[1]);
67 ABT_xstream_free(&xstreams[1]);
68
69 printf("Info functions help debug Argobots applications\n");
70
71 ABT_finalize();
72 return 0;
73}
Key Info Functions:
ABT_info_print_config(): Print Argobots configurationABT_info_print_all_xstreams(): Print all execution streamsABT_thread_get_state(): Query thread state
- Thread States:
ABT_THREAD_STATE_READY: In pool, ready to runABT_THREAD_STATE_RUNNING: Currently executingABT_THREAD_STATE_BLOCKED: Waiting on synchronizationABT_THREAD_STATE_TERMINATED: Finished execution
Error Handling
Proper error handling improves robustness:
1/*
2 * Proper error handling in Argobots
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <abt.h>
8
9void check_error(int ret, const char *msg)
10{
11 if (ret != ABT_SUCCESS) {
12 char *err_str;
13 size_t len;
14 ABT_error_get_str(ret, NULL, &len);
15 err_str = (char *)malloc(len);
16 ABT_error_get_str(ret, err_str, &len);
17 fprintf(stderr, "Error in %s: %s\n", msg, err_str);
18 free(err_str);
19 ABT_finalize();
20 exit(1);
21 }
22}
23
24int main(int argc, char **argv)
25{
26 int ret;
27 ABT_xstream xstream;
28 ABT_thread thread;
29 ABT_pool pool;
30
31 printf("=== Error Handling Example ===\n\n");
32
33 /* Initialize with error checking */
34 ret = ABT_init(argc, argv);
35 check_error(ret, "ABT_init");
36 printf("Argobots initialized successfully\n");
37
38 /* Get primary execution stream */
39 ret = ABT_xstream_self(&xstream);
40 check_error(ret, "ABT_xstream_self");
41
42 ret = ABT_xstream_get_main_pools(xstream, 1, &pool);
43 check_error(ret, "ABT_xstream_get_main_pools");
44
45 /* Try to create invalid thread (NULL function) */
46 printf("\nAttempting invalid operation (NULL function)...\n");
47 ret = ABT_thread_create(pool, NULL, NULL, ABT_THREAD_ATTR_NULL, &thread);
48
49 if (ret != ABT_SUCCESS) {
50 printf("Expected error occurred:\n");
51 char *err_str;
52 size_t len;
53 ABT_error_get_str(ret, NULL, &len);
54 err_str = (char *)malloc(len);
55 ABT_error_get_str(ret, err_str, &len);
56 printf(" Error code: %d\n", ret);
57 printf(" Error message: %s\n", err_str);
58 free(err_str);
59 }
60
61 printf("\nAlways check return values for robust error handling\n");
62
63 ABT_finalize();
64 return 0;
65}
- Error Handling Pattern (lines 9-20):
int ret = ABT_some_function(...); if (ret != ABT_SUCCESS) { ABT_error_get_str(ret, err_str, &len); /* Handle error */ }
- Common Error Codes:
ABT_SUCCESS: Operation succeededABT_ERR_INV_ARG: Invalid argumentABT_ERR_INV_XSTREAM: Invalid execution streamABT_ERR_INV_POOL: Invalid poolABT_ERR_INV_THREAD: Invalid threadetc.
Common Performance Bottlenecks
- 1. Too Many ULTs
Creating millions of ULTs overwhelms the system. Solution: Batch work, or consider reducing stack sizes for lightweight ULTs.
- 2. Fine-Grained Synchronization
Excessive locking/unlocking. Solution: Batch operations, use lock-free structures where possible.
- 3. Poor Work Distribution
Load imbalance across execution streams. Solution: Use work-stealing schedulers.
- 4. Excessive Context Switching
Too frequent yielding. Solution: Reduce yield frequency, batch work.
- 5. Synchronization Overhead
Wrong synchronization primitive. Solution: Use appropriate primitive (eventual vs barrier vs future vs mutex…).
Debugging Strategies
- Deadlock Debugging:
Use
ABT_info_print_all_xstreams()to see thread statesCheck for circular dependencies in locks/futures
Verify all work units can make progress
- Performance Debugging:
Measure with
ABT_timerIdentify bottleneck operations
Profile different scheduler types
Check pool utilization
- Memory Debugging:
Use valgrind to check for leaks
Ensure all
createcalls have matchingfreecallsCheck thread-local storage cleanup
Common Issues:
Forgetting to free work units: Memory leak
Double-freeing work units: Crash
Mismatched barrier count: Deadlock
Not checking return values: Silent failures
Profiling Tools
- External Tools:
valgrind: Memory leak detection
perf: CPU profiling
gprof: Call graph profiling
Intel VTune: Advanced performance analysis
- Argobots-Specific:
Built-in timers for work-unit execution
Info functions for state inspection
Custom instrumentation in schedulers
Best Practices
- Performance:
Configure appropriate stack sizes for ULTs based on workload
Use work-stealing for unbalanced workloads
Batch small operations
Minimize synchronization overhead
Profile before optimizing
- Debugging:
Always check return values
Use info functions during development
Add assertions for invariants
Test with different schedulers
Start simple, add complexity gradually
- Error Handling:
Check all Argobots API return values
Provide meaningful error messages
Clean up resources on error paths
Use helper functions for repetitive checks
API Reference
- Timing Functions:
double ABT_get_wtime(void)int ABT_timer_create(ABT_timer *newtimer)int ABT_timer_start(ABT_timer timer)int ABT_timer_stop(ABT_timer timer)int ABT_timer_read(ABT_timer timer, double *secs)int ABT_timer_free(ABT_timer *timer)
- Info Functions:
int ABT_info_print_config(FILE *fp)int ABT_info_print_all_xstreams(FILE *fp)int ABT_info_print_xstream(FILE *fp, ABT_xstream xstream)int ABT_info_print_pool(FILE *fp, ABT_pool pool)int ABT_info_print_thread(FILE *fp, ABT_thread thread)
- Error Functions:
int ABT_error_get_str(int err, char *str, size_t *len)