使用MPI 实现奇偶排序
0号进程获得待排序序列并输出排序好的序列
使用文件进行输入输出
进行性能测试与对比
代码
奇偶排序
头文件引入
#include <iostream>
#include <algorithm>
#include <mpi.h>
#include <fstream>
#include <chrono>
定义规模
#define N 100000000
int I[N];
计算partner
int Compute_partner(int phase, int my_rank, int comm_sz) {
int partner;
if (phase % 2 == 0) { // 偶数阶段
if (my_rank % 2 != 0) {
partner = my_rank - 1;
}
else {
partner = my_rank + 1;
}
}
else { // 奇数阶段
if (my_rank % 2 != 0) {
partner = my_rank + 1;
}
else {
partner = my_rank - 1;
}
}
if (partner < 0 || partner >= comm_sz) {
partner = -1; // No valid partner
}
return partner;
}
保留小一半数据
void Keep_smaller_keys(int* A, int* B, int n) {
int* tmp = new int[n];
int a = 0, b = 0, t = 0;
while (t < n) {
if (A[a] < B[b]) {
tmp[t] = A[a];
t++;
a++;
}
else {
tmp[t] = B[b];
t++;
b++;
}
}
for (int i = 0; i < n; i++) {
A[i] = tmp[i];
}
}
保留大一半数据
void Keep_larger_keys(int* A, int* B, int n) {
int* tmp = new int[n];
int a = n - 1, b = n - 1, t = n - 1;
while (t >= 0) {
if (A[a] < B[b]) {
tmp[t] = B[b];
t--;
b--;
}
else {
tmp[t] = A[a];
t--;
a--;
}
}
for (int i = 0; i < n; i++) {
A[i] = tmp[i];
}
}
主函数
int main() {
int* A, * B;
int comm_sz, my_rank;
int n;
MPI_Init(NULL, NULL);
auto start = std::chrono::high_resolution_clock::now();
auto finish = std::chrono::high_resolution_clock::now();
MPI_Comm_size(MPI_COMM_WORLD, &comm_sz);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
n = N / comm_sz;
A = new int[n];
B = new int[n];
//文件读入
if (my_rank == 0) {
ifstream input("input.txt");
for (int i = 0; i < N && input >> I[i]; i++);
input.close();
start = std::chrono::high_resolution_clock::now();
}
MPI_Scatter(I, n, MPI_INT, A, n, MPI_INT, 0, MPI_COMM_WORLD);
sort(A, A + n);
for (int phase = 0; phase < comm_sz; phase++) {
int partner = Compute_partner(phase, my_rank, comm_sz);
if (partner != -1) {
MPI_Sendrecv(A, n, MPI_INT, partner, 0, B, n, MPI_INT, partner, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
if (my_rank < partner) {
Keep_smaller_keys(A, B, n);
}
else {
Keep_larger_keys(A, B, n);
}
}
}
MPI_Gather(A, n, MPI_INT, I, n, MPI_INT, 0, MPI_COMM_WORLD);
//文件输出
if (my_rank == 0) {
finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
std::cout << "Elapsed time: " << elapsed.count() << " s\n" << std::endl;
ofstream output("output.txt");
for (int elem : I) {
output << elem << " ";
}
output << endl;
output.close();
}
MPI_Finalize();
return 0;
}
随机数序列生成
使用python生成一个包含100000000个数字的文本文件
import random
def generate_random_numbers(count):
# Generate random numbers and write to a file
with open(f'input{count}.txt', 'w') as file:
for _ in range(count):
number = random.randint(1, 10*count) # Random numbers between 1 and 100
file.write(f"{number}\n")
if __name__ == "__main__":
generate_random_numbers(pow(10,8))
测试代码
#define _CRT_SECURE_NO_WARNINGS
#include<iostream>
#include<fstream>
#include<algorithm>
#include<Windows.h>
#include<chrono>
using namespace std;
#define N 100000000
int I1[N], I2[N];
int main() {
ifstream input1("C:\\Users\\Xavier\\Documents\\并行\\Project2\\Project2\\input.txt");
for (int i = 0; i < N && input1 >> I1[i]; i++);
input1.close();
ifstream input2("C:\\Users\\Xavier\\Documents\\并行\\Project2\\Project2\\output.txt");
for (int i = 0; i < N && input2 >> I2[i]; i++);
input2.close();
auto start = std::chrono::high_resolution_clock::now();
sort(I1, I1 + N);
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
std::cout << "Elapsed time: " << elapsed.count() << " s\n" << std::endl;
Sleep(5000);
bool is_success = true;
for (int i = 0; i < N; i++) {
if (I1[i] == I2[i]) {
if (i % (N/10) == 0) {
// Update progress bar in the first line
HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
COORD pos = { 0, 0 }; // Go back to the start of the first line
SetConsoleCursorPosition(hConsole, pos);
printf("测试中[%.2lf%%]: ", i * 100.0 / N);
int show_num = (i + 1) * 20 / N;
for (int j = 0; j < show_num; j++) {
cout << "=";
}
for (int j = show_num; j < 20; j++) {
cout << " ";
}
cout << "|" << endl;
cout << i + 1 << "st Number Pass" << endl;
Sleep(100); // Adjust speed of the update
}
}
else {
is_success = false;
cout << "No " << i + 1 << " failed" << endl;
cout << I1[i] << " " << I2[i] << endl;
break;
}
}
if (is_success) {
cout << "Success" << endl;
}
else {
cout << "Test failed." << endl;
}
return 0;
}
可视化代码
利用matplot进行可视化
import matplotlib.pyplot as plt
import json
with open('test.json') as f:
data = json.load(f)
# Extracting data for plotting
powers = [item['pow'] for item in data]
oddeven_values = [item['oddeven'] * 1000 for item in data] # Convert to milliseconds
std_values = [item['std'] * 1000 for item in data] # Convert to milliseconds
# Number of groups
n_groups = len(powers)
# Create plot
fig, ax = plt.subplots()
index = range(n_groups)
bar_width = 0.35
opacity = 0.8
rects1 = ax.bar(index, oddeven_values, bar_width, alpha=opacity, color='b', label='OddEven (ms)')
rects2 = ax.bar([p + bar_width for p in index], std_values, bar_width, alpha=opacity, color='r', label='Std (ms)')
ax.set_xlabel('Log(Number of elements)')
ax.set_ylabel('Time (ms)')
ax.set_title('Execution time comparison')
ax.set_yscale('log') # Set logarithmic scale
ax.set_xticks([p + bar_width / 2 for p in index])
ax.set_xticklabels(powers)
ax.legend()
plt.tight_layout()
plt.show()
结果
因为我的电脑的CPU是I7-12700H,只有20个物理核心,所以在执行MPI并行程序时将线程设置为16。将16线程奇偶排序与串行Std::Sort在不同数据规模下用相同数据集进行测试,得到的结果如下(具体的数值记录在test.json文件中):
对于较小的数据集(小于等于 1 0 4 10^4 104个数字),并行奇偶排序的执行效率不如Std::Sort,但是随着数据规模增加,并行奇偶排序的优势开始体现,对于超过 1 0 5 10^5 105个数字的数据集,并行奇偶排序的效率明显更高,在数据规模来到 1 0 8 10^8 108时,16线程并行奇偶排序的用时是5.43495s,而串行Std::Sort的用时是42.4069s。随着规模增大,并行奇偶排序的优势将不断增加,并行特性使其能够更高效地处理较大的数据集。
遇到的问题及解决
C2131表达式的计算结果不是常数
vs2022禁止如下语句:
int a[n]; //n为变量
我们可以使用动态内存分配解决:
int *a=new int[n];
使用普通计时器精度过低
在数据规模较小时使用普通计时器无法记录程序运行时间,采用高精度计时器:
#include <chrono>
auto start = std::chrono::high_resolution_clock::now();
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
std::cout << "Elapsed time: " << elapsed.count() << " s\n"<<std::endl;
有未经处理的异常: 0xC00000FD: Stack overflow
爆栈了,可以在vs2022中手动设置堆栈保留大小或者使用动态内存分配
C2148: 数组的总大小不得超过0x7fffffff字节
在测试 1 0 9 10^9 109数据时报错,因此最终仅对 1 0 8 10^8 108规模的数据做测试
标签:std,奇偶,int,rank,MPI,partner,排序,my From: https://blog.csdn.net/qq_42426268/article/details/139791657