Pierre Olivier
0x42
for both processes is mapped to different physical location in the page tableCan run 2 independent processes in parallel on a multicore, but how to parallelise a single program?
0x42
for both processes is mapped to different physical location in the page tableThreads communicate using shared memory
pthread_create()
to create and launch a threadpthread_create()
to create and launch a threadpthread_exit()
to have the calling thread exitpthread_create()
to create and launch a threadpthread_exit()
to have the calling thread exitpthread_join()
to wait for another thread to finishman pthread_*
1 and Google “pthreads” for lots of documentation1 First install the relevant development man pages:
sudo apt install manpages-dev manpages-posix manpages-posix-dev
// Compile with:// gcc pthread.c -o pthread -lpthread#include <stdio.h>#include <stdlib.h>#include <pthread.h> // pthread header#define NOWORKERS 5// Function executed by all threadsvoid *thread_fn(void *arg) { int id = (int)(long)arg; printf("Thread %d running\n", id); pthread_exit(NULL); // exit // never reached}
int main(void) { // Each thread is controlled through a // pthread_t data structure pthread_t workers[NOWORKERS]; // Create and launch the threads for(int i=0; i<NOWORKERS; i++) if(pthread_create(&workers[i], NULL, thread_fn, (void *)(long)i)) { perror("pthread_create"); return -1; } // Wait for threads to finish for (int i = 0; i < NOWORKERS; i++) if(pthread_join(workers[i], NULL)) { perror("pthread_join"); return -1; } printf("All done\n");}
class MyThread extends Thread { // create a class that inheritates from Thread int id; MyThread(int id) { this.id = id; } // override the method run() to define the code executed by the thread: public void run() { System.out.println("Thread " + id + " running"); }}class Main { public static void main(String[] args) { int NOWORKERS = 5; MyThread[] threads = new MyThread[NOWORKERS]; for (int i = 0; i < NOWORKERS; i++) threads[i] = new MyThread(i); for (int i = 0; i < NOWORKERS; i++) threads[i].start(); // start executing the threads for (int i = 0; i < NOWORKERS; i++) try { threads[i].join(); } catch (InterruptedException e) { /* do nothing */ } System.out.println("All done"); }}// compile and launch with://javac java-thread.java && java Main
03-shared-memory-programming/java-thread.java
#!/usr/bin/python3import threadingNOWORKERS=5def print_hello(id): print("Thread " + str(id) + " running")threads = []for i in range(NOWORKERS): thread = threading.Thread(target=print_hello, args=(i,)) threads.append(thread) thread.start()for thread in threads: thread.join()print("All done.")
03-shared-memory-programming/python.py
use std::thread;const NOWORKERS: u32 = 5;fn print_hello(id: u32) { println!("Thread {} running", id);}fn main() { let mut handles = vec![]; for i in 0..NOWORKERS { let handle = thread::spawn(move || { print_hello(i); }); // `move ||` transfers handles.push(handle); // ownership of i inside } // the closure for handle in handles { handle.join().unwrap(); // unwrap panics if the thread's execution returns an error } println!("All done.");}// With Rust toolchain installed, compile with:// rustc <source file>
03-shared-memory-programming/rust.rs
For all examples:
Thread 1 runningThread 0 runningThread 2 runningThread 4 runningThread 3 runningAll done
For all examples:
Thread 1 runningThread 0 runningThread 2 runningThread 4 runningThread 3 runningAll done
A possible scheduling scenario:
For all examples:
Thread 1 runningThread 0 runningThread 2 runningThread 4 runningThread 3 runningAll done
A possible scheduling scenario:
Another one on 1 core:
#define N 1000int A[N][N]; int B[N][N]; int C[N][N];int main(int argc, char **argv) { /* init matrices here */ for(int i=0; i<N; i++) for(int j=0; j<N; j++) { C[i][j] = 0; for(int k=0; k<N; k++) C[i][j] += A[i][k] * B[k][j]; } return 0;}
#include <omp.h>#define N 1000int A[N][N]; int B[N][N]; int C[N][N];int main(int argc, char **argv) { /* init matrices here */#pragma omp parallel { /* First loop parralelised */ for(int i=0; i<N; i++) for(int j=0; j<N; j++) { C[i][j] = 0; for(int k=0; k<N; k++) C[i][j] += A[i][k] * B[k][j]; } } return 0;}
gcc openmp.c -o prog -fopenmp && OMP_NUM_THREADS=8 ./prog
Parallelising with pthread would require much more code rewrite
Fortran 77 sequential array addition:
DO I = 1, N DO J = 1, N C(J, I) = A(J, I) + B(J, I) END DOEND DO
In Fortran 90 with implicit parallelism:
C = A + B
for (int i = 0 ; i < n-3 ; i++) { a[i] = a[i+3] + b[i] ; // at iteration i, read dependency with index i+3}for (int i = 5 ; i < n ; i++) { a[i] += a[i-5] * 2 ; // at iteration i, read dependency with index i-5}for (int i = 0 ; i < n ; i++) { a[i] = a[i + j] + 1 ; // at iteration i, read dependency with index ???}
for (int i=0; i<n-3; i++) a[i] = a[i+3] + b[i];
for (int i=0; i<n-3; i++) a[i] = a[i+3] + b[i];
a
parrallel_for(int i=0; i<n-3; i++) new_a[i] = a[i+3] + b[i];a = new_a;
for (int i = 5 ; i < n ; i++) { a[i] += a[i-5] * 2 ;}
for (int i = 5 ; i < n ; i++) { a[i] += a[i-5] * 2 ;}
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
Esc | Back to slideshow |