added code for 61x67 matrix
parent
383a851b1a
commit
8795174f3a
63
src/trans.c
63
src/trans.c
|
@ -27,15 +27,31 @@ int is_transpose(int M, int N, int A[N][M], int B[M][N]);
|
||||||
char transpose_submit_desc[] = "Transpose submission";
|
char transpose_submit_desc[] = "Transpose submission";
|
||||||
void transpose_submit(int M, int N, int A[N][M], int B[M][N])
|
void transpose_submit(int M, int N, int A[N][M], int B[M][N])
|
||||||
{
|
{
|
||||||
//343 misses (goal: <300; 0 points if >600)
|
|
||||||
|
int blocksize=0;
|
||||||
|
switch (N)
|
||||||
|
{
|
||||||
|
case 32:
|
||||||
|
blocksize = 8;
|
||||||
|
break;
|
||||||
|
case 64:
|
||||||
|
blocksize = 4;
|
||||||
|
break;
|
||||||
|
case 67:
|
||||||
|
blocksize = 18;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if(N == 32){
|
if(N == 32){
|
||||||
int n, m, nn, mm, diag_pos, diag_value;
|
int n, m, nn, mm, diag_pos, diag_value;
|
||||||
|
|
||||||
for (n=0; n<N;n+=8){
|
for (n=0; n<N;n+=blocksize){
|
||||||
for (m=0;m<M; m+=8){
|
for (m=0;m<M; m+=blocksize){
|
||||||
|
|
||||||
for(nn=n; nn<n+8;nn++){
|
for(nn=n; nn<n+blocksize;nn++){
|
||||||
for(mm=m; mm<m+8;mm++){
|
for(mm=m; mm<m+blocksize;mm++){
|
||||||
|
|
||||||
if(nn!=mm){
|
if(nn!=mm){
|
||||||
B[mm][nn] = A[nn][mm];
|
B[mm][nn] = A[nn][mm];
|
||||||
|
@ -55,11 +71,34 @@ void transpose_submit(int M, int N, int A[N][M], int B[M][N])
|
||||||
if(N == 64){
|
if(N == 64){
|
||||||
int n, m, nn, mm, diag_pos, diag_value;
|
int n, m, nn, mm, diag_pos, diag_value;
|
||||||
|
|
||||||
for (n=0; n<N;n+=4){
|
for (n=0; n<N;n+=blocksize){
|
||||||
for (m=0;m<M; m+=4){
|
for (m=0;m<M; m+=blocksize){
|
||||||
|
|
||||||
for(nn=n; nn<n+4;nn++){
|
for(nn=n; nn<n+blocksize;nn++){
|
||||||
for(mm=m; mm<m+4;mm++){
|
for(mm=m; mm<m+blocksize;mm++){
|
||||||
|
|
||||||
|
if(nn!=mm){
|
||||||
|
B[mm][nn] = A[nn][mm];
|
||||||
|
}else{
|
||||||
|
diag_pos = nn;
|
||||||
|
diag_value = A[nn][mm];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(n==m){
|
||||||
|
B[diag_pos][diag_pos]= diag_value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(N == 67){
|
||||||
|
int n, m, nn, mm, diag_pos, diag_value;
|
||||||
|
|
||||||
|
for (n=0; n<N;n+=blocksize){
|
||||||
|
for (m=0;m<M; m+=blocksize){
|
||||||
|
|
||||||
|
for(nn=n; (nn<n+blocksize)&&(nn<N);nn++){
|
||||||
|
for(mm=m; (mm<m+blocksize)&&(mm<M);mm++){
|
||||||
|
|
||||||
if(nn!=mm){
|
if(nn!=mm){
|
||||||
B[mm][nn] = A[nn][mm];
|
B[mm][nn] = A[nn][mm];
|
||||||
|
@ -85,7 +124,7 @@ void transpose_submit(int M, int N, int A[N][M], int B[M][N])
|
||||||
/*
|
/*
|
||||||
* trans - A simple baseline transpose function, not optimized for the cache.
|
* trans - A simple baseline transpose function, not optimized for the cache.
|
||||||
*/
|
*/
|
||||||
char trans_desc[] = "Simple row-wise scan transpose";
|
/*char trans_desc[] = "Simple row-wise scan transpose";
|
||||||
void trans(int M, int N, int A[N][M], int B[M][N])
|
void trans(int M, int N, int A[N][M], int B[M][N])
|
||||||
{
|
{
|
||||||
int i, j, tmp;
|
int i, j, tmp;
|
||||||
|
@ -97,7 +136,7 @@ void trans(int M, int N, int A[N][M], int B[M][N])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* registerFunctions - This function registers your transpose
|
* registerFunctions - This function registers your transpose
|
||||||
|
@ -112,7 +151,7 @@ void registerFunctions()
|
||||||
registerTransFunction(transpose_submit, transpose_submit_desc);
|
registerTransFunction(transpose_submit, transpose_submit_desc);
|
||||||
|
|
||||||
/* Register any additional transpose functions */
|
/* Register any additional transpose functions */
|
||||||
registerTransFunction(trans, trans_desc);
|
//registerTransFunction(trans, trans_desc);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue