Исправление vtVitus, (текущая версия) :
а чё банального не хватает?
void BallsManager::processX()
{
static const float min_x = 0.0f;
static const float max_x = 640.0f;
#pragma omp parallel for
for (size_t i = 0; i < m_count; ++i)
{
const float curr_x = x[i];
if (curr_x <= min_x || curr_x >= max_x)
{
x[i] = curr_x + (dir_x[i] = -dir_x[i]);
} else {
x[i] = curr_x + dir_x[i];
}
}
}
void BallsManager::processY() {
static const float min_y = 0.0f;
static const float max_y = 480.0f;
#pragma omp parallel for
for (size_t i = 0; i < m_count; ++i) {
const float curr_y = y[i];
if (curr_y <= min_y || curr_y >= max_y)
{
y[i] = curr_y + (dir_y[i]= -dir_y[i]);
} else {
y[i] = curr_y + dir_y[i];
}
}
}
int main()
{
BallsManager bm(15000);
//-- need to speedup from here --
const int itSize = 1000000;
#pragma omp parallel for
for (int iterations = 0; iterations < itSize; ++iterations) {
bm.processX();
}
#pragma omp parallel for
for (int iterations = 0; iterations < itSize; ++iterations) {
bm.processY();
}
//-- to here --
return 0;
}
g++ -O2 mod.cc -fopenmp -o mod
в 16 процессорной виртуалке, где то в 12 раз +)
Исходная версия vtVitus, :
а чё банального не хватает?
void BallsManager::processX()
{
static const float min_x = 0.0f;
static const float max_x = 640.0f;
#pragma omp parallel for
for (size_t i = 0; i < m_count; ++i)
{
const float curr_x = x[i];
if (curr_x <= min_x || curr_x >= max_x)
{
x[i] = curr_x + (dir_x[i] = -dir_x[i]);
} else {
x[i] = curr_x + dir_x[i];
}
}
}
void BallsManager::processY() {
static const float min_y = 0.0f;
static const float max_y = 480.0f;
#pragma omp parallel for
for (size_t i = 0; i < m_count; ++i) {
const float curr_y = y[i];
if (curr_y <= min_y || curr_y >= max_y)
{
y[i] = curr_y + (dir_y[i]= -dir_y[i]);
} else {
y[i] = curr_y + dir_y[i];
}
}
}
int main()
{
BallsManager bm(15000);
//-- need to speedup from here --
const int itSize = 1000000;
#pragma omp parallel for
for (int iterations = 0; iterations < itSize; ++iterations) {
bm.processX();
}
#pragma omp parallel for
for (int iterations = 0; iterations < itSize; ++iterations) {
bm.processY();
}
//-- to here --
return 0;
}