基本思想:对于for的任务分担 schedule(static|dynamic|guided|runtime[size])
(1)for的任务分担
#pragma omp parallel
{
#pragma omp for
for(int i=0;i<num/2;i++)//num此为偶数
{
.....
}
#pragma omp for
for(int i=num/2;i<num;i++)
{
.......
}
}
测试代码
#include <iostream>
#include <omp.h>
#include<chrono>
#include<vector>
#include<thread>
using namespace std;
using namespace chrono;
void sequentialProgram(int num)
{
for(int i=0;i<num;i++)
{
// std::cout<<"hello world"<<std::endl;
printf("%s the current thread id: %d\n","hello world",omp_get_thread_num());
}
}
void parallelProgram(int num)
{
#pragma omp parallel
{
#pragma omp for
for(int i=0;i<num/2;i++)
{
//std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
printf("%s the current thread id: %d\n","A hello world",omp_get_thread_num());
}
#pragma omp for
for(int i=num/2;i<num;i++) {
//std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
printf("%s the current thread id: %d\n","B hello world",omp_get_thread_num());
}
}
}
int main() {
int num=omp_get_num_procs()*2;
auto start_time=std::chrono::steady_clock::now();
sequentialProgram(num);
auto end_time=std::chrono::steady_clock::now();
std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
start_time=std::chrono::steady_clock::now();
parallelProgram(num);
end_time=std::chrono::steady_clock::now();
std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
return 0;
}
测试结果,在一个并行域中,对多个for进行制导指令处理,可以使用调度指令简化完成这一操作
F:\OpenMP\cmake-build-debug\OpenMP.exe
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
sequentialProgram elapse time: 0.0776085 seconds
A hello world the current thread id: 1
A hello world the current thread id: 0
A hello world the current thread id: 3
A hello world the current thread id: 5
A hello world the current thread id: 7
A hello world the current thread id: 10
A hello world the current thread id: 9
A hello world the current thread id: 8
A hello world the current thread id: 2
A hello world the current thread id: 4
A hello world the current thread id: 6
A hello world the current thread id: 11
B hello world the current thread id: 1
B hello world the current thread id: 0
B hello world the current thread id: 7
B hello world the current thread id: 9
B hello world the current thread id: 2
B hello world the current thread id: 6
B hello world the current thread id: 4
B hello world the current thread id: 10
B hello world the current thread id: 3
B hello world the current thread id: 8
B hello world the current thread id: 5
B hello world the current thread id: 11
parallelProgram elapse time: 0.0527985 seconds
Process finished with exit code 0
(2)使用for的调度指令schedule
#pragma omp parallel for schedule(static|dynamic}guided|runtime[size])
for (int i = 0; i < num; i++)
{
.......
}
当写成
#pragma omp parallel for
等价
#pragma omp parallel for schedule(static)
等价
#pragma omp parallel for schedule(static,num/omp_get_num_procs()) // num=omp_get_num_procs()*2;
其中static 设置为多少线程来处理迭代计算任务
其中size 为可选项,当不设置size参数时,默认for循环的线程以num/omp_get_num_procs()来分配
测试代码
#include <iostream>
#include <omp.h>
#include<chrono>
#include<vector>
#include<thread>
using namespace std;
using namespace chrono;
void sequentialProgram(int num)
{
for(int i=0;i<num;i++)
{
// std::cout<<"hello world"<<std::endl;
printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
}
}
void parallelProgram(int num) {
//#pragma omp parallel for
//#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static,2) // 上述三种预处理指令是一样的效果 注意设置的num循环测试
for (int i = 0; i < num; i++) {
//std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
}
}
int main() {
int num=omp_get_num_procs()*2;
auto start_time=std::chrono::steady_clock::now();
sequentialProgram(num);
auto end_time=std::chrono::steady_clock::now();
std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
start_time=std::chrono::steady_clock::now();
parallelProgram(num);
end_time=std::chrono::steady_clock::now();
std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
return 0;
}
测试结果是相同的
F:\OpenMP\cmake-build-debug\OpenMP.exe
i=0 the current thread id: 0
i=1 the current thread id: 0
i=2 the current thread id: 0
i=3 the current thread id: 0
i=4 the current thread id: 0
i=5 the current thread id: 0
i=6 the current thread id: 0
i=7 the current thread id: 0
i=8 the current thread id: 0
i=9 the current thread id: 0
i=10 the current thread id: 0
i=11 the current thread id: 0
i=12 the current thread id: 0
i=13 the current thread id: 0
i=14 the current thread id: 0
i=15 the current thread id: 0
i=16 the current thread id: 0
i=17 the current thread id: 0
i=18 the current thread id: 0
i=19 the current thread id: 0
i=20 the current thread id: 0
i=21 the current thread id: 0
i=22 the current thread id: 0
i=23 the current thread id: 0
sequentialProgram elapse time: 0.0422739 seconds
i=0 the current thread id: 0
i=1 the current thread id: 0
i=4 the current thread id: 2
i=5 the current thread id: 2
i=14 the current thread id: 7
i=15 the current thread id: 7
i=18 the current thread id: 9
i=19 the current thread id: 9
i=16 the current thread id: 8
i=17 the current thread id: 8
i=12 the current thread id: 6
i=13 the current thread id: 6
i=2 the current thread id: 1
i=3 the current thread id: 1
i=10 the current thread id: 5
i=11 the current thread id: 5
i=6 the current thread id: 3
i=7 the current thread id: 3
i=8 the current thread id: 4
i=9 the current thread id: 4
i=22 the current thread id: 11
i=23 the current thread id: 11
i=20 the current thread id: 10
i=21 the current thread id: 10
parallelProgram elapse time: 0.0412098 seconds
Process finished with exit code 0
(3)虽然参数static均衡的分担任务,但是存在某些线程处理速度上的差异,因此引入dynamic
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < num; i++) {
......
}
}
测试代码
#include <iostream>
#include <omp.h>
#include<chrono>
#include<vector>
#include<thread>
using namespace std;
using namespace chrono;
void sequentialProgram(int num)
{
for(int i=0;i<num;i++)
{
// std::cout<<"hello world"<<std::endl;
printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
}
}
void parallelProgram(int num) {
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < num; i++) {
//std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
}
}
int main() {
int num=omp_get_num_procs()*2;
auto start_time=std::chrono::steady_clock::now();
sequentialProgram(num);
auto end_time=std::chrono::steady_clock::now();
std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
start_time=std::chrono::steady_clock::now();
parallelProgram(num);
end_time=std::chrono::steady_clock::now();
std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
return 0;
}
测试结果可以看出,线程id=9处理速度较快,因此承担了更多的任务,当然也可以使用size进行限制线程处理任务的数量~
F:\OpenMP\cmake-build-debug\OpenMP.exe
i=0 the current thread id: 0
i=1 the current thread id: 0
i=2 the current thread id: 0
i=3 the current thread id: 0
i=4 the current thread id: 0
i=5 the current thread id: 0
i=6 the current thread id: 0
i=7 the current thread id: 0
i=8 the current thread id: 0
i=9 the current thread id: 0
i=10 the current thread id: 0
i=11 the current thread id: 0
i=12 the current thread id: 0
i=13 the current thread id: 0
i=14 the current thread id: 0
i=15 the current thread id: 0
i=16 the current thread id: 0
i=17 the current thread id: 0
i=18 the current thread id: 0
i=19 the current thread id: 0
i=20 the current thread id: 0
i=21 the current thread id: 0
i=22 the current thread id: 0
i=23 the current thread id: 0
sequentialProgram elapse time: 0.041236 seconds
i=0 the current thread id: 2
i=6 the current thread id: 9
i=13 the current thread id: 9
i=14 the current thread id: 9
i=15 the current thread id: 9
i=16 the current thread id: 9
i=17 the current thread id: 9
i=18 the current thread id: 9
i=19 the current thread id: 9
i=20 the current thread id: 9
i=21 the current thread id: 9
i=22 the current thread id: 9
i=23 the current thread id: 9
i=5 the current thread id: 11
i=3 the current thread id: 1
i=4 the current thread id: 8
i=7 the current thread id: 4
i=1 the current thread id: 10
i=2 the current thread id: 3
i=8 the current thread id: 0
i=9 the current thread id: 6
i=10 the current thread id: 7
i=11 the current thread id: 5
i=12 the current thread id: 2
parallelProgram elapse time: 0.0399313 seconds
Process finished with exit code 0
(4)guided 采用启发式调度算法,开始分配较大的块,然后逐渐变小,最后分配给每个线程的任务为size数量,如果没设置size,将在最后分配给每个任务量为1
#pragma omp parallel for schedule(guided)
for (int i = 0; i < num; i++) {
.....
}
}
测试代码
#include <iostream>
#include <omp.h>
#include<chrono>
#include<vector>
#include<thread>
using namespace std;
using namespace chrono;
void sequentialProgram(int num)
{
for(int i=0;i<num;i++)
{
// std::cout<<"hello world"<<std::endl;
printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
}
}
void parallelProgram(int num) {
#pragma omp parallel for schedule(guided)
for (int i = 0; i < num; i++) {
//std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
}
}
int main() {
int num=omp_get_num_procs()*2-5;
auto start_time=std::chrono::steady_clock::now();
sequentialProgram(num);
auto end_time=std::chrono::steady_clock::now();
std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
start_time=std::chrono::steady_clock::now();
parallelProgram(num);
end_time=std::chrono::steady_clock::now();
std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
return 0;
}
测试结果,第一次先为每个线程分配两个任务,然后最后变成每个线程只能承担一个任务 执行
F:\OpenMP\cmake-build-debug\OpenMP.exe
i=0 the current thread id: 0
i=1 the current thread id: 0
i=2 the current thread id: 0
i=3 the current thread id: 0
i=4 the current thread id: 0
i=5 the current thread id: 0
i=6 the current thread id: 0
i=7 the current thread id: 0
i=8 the current thread id: 0
i=9 the current thread id: 0
i=10 the current thread id: 0
i=11 the current thread id: 0
i=12 the current thread id: 0
i=13 the current thread id: 0
i=14 the current thread id: 0
i=15 the current thread id: 0
i=16 the current thread id: 0
i=17 the current thread id: 0
i=18 the current thread id: 0
sequentialProgram elapse time: 0.033042 seconds
i=0 the current thread id: 0
i=1 the current thread id: 0
i=16 the current thread id: 0
i=17 the current thread id: 0
i=18 the current thread id: 0
i=6 the current thread id: 5
i=7 the current thread id: 5
i=2 the current thread id: 3
i=3 the current thread id: 3
i=13 the current thread id: 6
i=14 the current thread id: 2
i=15 the current thread id: 11
i=10 the current thread id: 8
i=12 the current thread id: 9
i=8 the current thread id: 7
i=9 the current thread id: 1
i=11 the current thread id: 10
i=4 the current thread id: 4
i=5 the current thread id: 4
parallelProgram elapse time: 0.0334159 seconds
Process finished with exit code 0
(5)runtime 设置之后,将获取系统的任务属性来来调用上述三种中的一种方法,我测试一下,好像每次都是以dynamic 的方式调用~~
#pragma omp parallel for schedule(runtime)
for (int i = 0; i < num; i++) {
......
}
}
测试代码
#include <iostream>
#include <omp.h>
#include<chrono>
#include<vector>
#include<thread>
using namespace std;
using namespace chrono;
void sequentialProgram(int num)
{
for(int i=0;i<num;i++)
{
// std::cout<<"hello world"<<std::endl;
printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
}
}
void parallelProgram(int num) {
#pragma omp parallel for schedule(runtime)
for (int i = 0; i < num; i++) {
//std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
}
}
int main() {
int num=omp_get_num_procs()*2;
auto start_time=std::chrono::steady_clock::now();
sequentialProgram(num);
auto end_time=std::chrono::steady_clock::now();
std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
start_time=std::chrono::steady_clock::now();
parallelProgram(num);
end_time=std::chrono::steady_clock::now();
std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
return 0;
}
测试结果
F:\OpenMP\cmake-build-debug\OpenMP.exe标签:runtime,thread,schedule,current,num,0i,world,任务调度,id From: https://blog.51cto.com/u_12504263/5719059
i=0 the current thread id: 0
i=1 the current thread id: 0
i=2 the current thread id: 0
i=3 the current thread id: 0
i=4 the current thread id: 0
i=5 the current thread id: 0
i=6 the current thread id: 0
i=7 the current thread id: 0
i=8 the current thread id: 0
i=9 the current thread id: 0
i=10 the current thread id: 0
i=11 the current thread id: 0
i=12 the current thread id: 0
i=13 the current thread id: 0
i=14 the current thread id: 0
i=15 the current thread id: 0
i=16 the current thread id: 0
i=17 the current thread id: 0
i=18 the current thread id: 0
i=19 the current thread id: 0
i=20 the current thread id: 0
i=21 the current thread id: 0
i=22 the current thread id: 0
i=23 the current thread id: 0
sequentialProgram elapse time: 0.0410057 seconds
i=0 the current thread id: 1
i=8 the current thread id: 9
i=13 the current thread id: 9
i=14 the current thread id: 9
i=15 the current thread id: 9
i=16 the current thread id: 9
i=17 the current thread id: 9
i=18 the current thread id: 9
i=19 the current thread id: 9
i=20 the current thread id: 9
i=21 the current thread id: 9
i=22 the current thread id: 9
i=23 the current thread id: 9
i=6 the current thread id: 2
i=5 the current thread id: 8
i=7 the current thread id: 11
i=3 the current thread id: 10
i=4 the current thread id: 3
i=2 the current thread id: 4
i=1 the current thread id: 7
i=9 the current thread id: 0
i=10 the current thread id: 6
i=11 the current thread id: 5
i=12 the current thread id: 1
parallelProgram elapse time: 0.042588 seconds
Process finished with exit code 0