Skip to content
Snippets Groups Projects

Problem of inlining nvshmemx_collective_launch usages

  • Clone with SSH
  • Clone with HTTPS
  • Embed
  • Share
    The snippet can be accessed without any authentication.
    Authored by Semih Burak

    From nvshmem-collectiveLaunch-example.cu, currently I get nvshmem-collectiveLaunch-example.mlir, by applying:

    clangFlags="-O0 $S -I$CPATH -fno-exceptions -mllvm --emit-mlir -mllvm --transformer-enable -mllvm \
    --transformer-pre-merge-mlir-pipeline="canonicalize,convert-llvm-to-cf,canonicalize,convert-llvm-to-arith,canonicalize" "  
     $clang $clangFlags $inputPath -o $outputFile
    Edited
    nvshmem-collectiveLaunch-example.cu 2.87 KiB
    // RACE LABELS BEGIN
    /*
    {
        "RACE_KIND": "local",
        "ACCESS_SET": ["local buffer write","load"],
        "RACE_PAIR": ["nvshmem_int_get_nbi@36","LOAD@38"],
        "NPROCS": 2,
        "CONSISTENCY_CALLS": ["nvshmem_barrier_all"],
        "SYNC_CALLS": ["nvshmem_barrier_all"],
        "DESCRIPTION": "Two conflicting operations get_nbi and load which are not correctly separated by an nvshmem_barrier_all leading to a local race. The shmem_quiet should be in between the two conflicting operations to be correct."
    }
    */
    // RACE LABELS END
    
    #include <cuda.h>
    #include <nvshmem.h>
    #include <nvshmemx.h>
    #include <stdio.h>
    
    // Number of processing elements
    #define PROC_NUM 2
    
    __global__ void nvshmem_kernel(int *remote, int *localbuf) {
        // Initialize memory
        *remote = 0;
        *localbuf = 1;
        int x = 0;
    
        int my_pe = nvshmem_my_pe();
    
        // Synchronize across all PEs
        nvshmem_barrier_all();    
    
        if (my_pe == 0) {
            // CONFLICT
            nvshmem_int_get_nbi(localbuf, remote, 1, 1);
            // CONFLICT
            x = *localbuf;
            nvshmem_barrier_all();
        } else {
            nvshmem_barrier_all();
        }
        *remote += x;
    }
    
    
    int main(int argc, char **argv) {
        int remote, localbuf;
    
        // Initialize NVSHMEM
        nvshmem_init();
        int mype_node = nvshmem_team_my_pe(NVSHMEMX_TEAM_NODE);
        cudaSetDevice(mype_node);
    
        // Get the number of PEs and the current PE's rank
        int my_pe = nvshmem_my_pe();
        int num_pe = nvshmem_n_pes();
        // Ensure the required number of PEs
        if (num_pe != PROC_NUM) {
            printf("Got %d PEs, expected %d\n", num_pe, PROC_NUM);
            nvshmem_global_exit(1);
        }
    
        // Allocate symmetric memory on the device
        int *remote_d = (int *)nvshmem_malloc(sizeof(int));
        int *localbuf_d = (int *)nvshmem_malloc(sizeof(int));
    
        // Step 3: Allocate shared memory across PEs
        size_t shared_data_size = 0 * sizeof(int);
    
        // Step 4: Define kernel execution parameters
        void *args[] = {remote_d, localbuf_d};  // Kernel arguments
        dim3 blocks(1);
        dim3 threads(1);
    
        // Launch kernel collectively across all PEs
        nvshmemx_collective_launch((void *)nvshmem_kernel, blocks, threads, args, shared_data_size, 0);
    
        // Copy data back to host
        cudaMemcpy(&remote, remote_d, sizeof(int), cudaMemcpyDeviceToHost);
        cudaMemcpy(&localbuf, localbuf_d, sizeof(int), cudaMemcpyDeviceToHost);
    
        // Synchronize
        nvshmem_barrier_all();
    
        printf("PE %d: localbuf = %d, remote = %d\n", my_pe, localbuf, remote);
    
        // Synchronize again
        nvshmem_barrier_all();
    
        printf("Process %d: Execution finished, variable contents: remote = %d, localbuf = %d\n", my_pe, remote, localbuf);
    
        // Free NVSHMEM symmetric memory
        nvshmem_free(remote_d);
        nvshmem_free(localbuf_d);
    
        // Finalize NVSHMEM
        nvshmem_finalize();
    
        return 0;
    }
    nvshmem-collectiveLaunch-example.mlir 35.21 KiB
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Please register or to comment