Skip to content

Commit

Permalink
merge issue-628 manually
Browse files Browse the repository at this point in the history
Addresses #644.
  • Loading branch information
PeiMu committed Mar 5, 2023
1 parent 37f066b commit 50853c4
Show file tree
Hide file tree
Showing 6 changed files with 317 additions and 18 deletions.
46 changes: 46 additions & 0 deletions analysis/statistics/77e9b64be9282e1325602ee373eb4eb470a4495f.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@

changeset: 1435:77e9b64be9282e1325602ee373eb4eb470a4495f
char kNewtonVersion[] = "0.3-alpha-1435 (77e9b64be9282e1325602ee373eb4eb470a4495f) (build 03-02-2023-19:[email protected]_64)";
\n./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s
\n./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt

Informational Report:
---------------------
Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)...

Kernel 0 is a valid kernel:

1 1
-0.5 -0
1 0
0.5 0
0 -1
-0 -1


The ordering of parameters is: P1 P0 P3 P2 P4 P5

Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0)

Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1)


Kernel 1 is a valid kernel:

1 0
-0.5 1
1 -2
0.5 -1
-0 -2
0 -2


The ordering of parameters is: P1 P0 P3 P2 P4 P5

Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0)

Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2)




2 changes: 1 addition & 1 deletion applications/newton/llvm-ir/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ endif

all: default

default: application.ll simple_control_flow.ll inferBound.ll inferBoundControlFlow.ll e_exp.ll sincosf.ll e_log.ll e_acosh.ll e_j0.ll e_y0.ll e_rem_pio2.ll benchmark_suite.ll phi_two_global_arrays.ll func_call.ll test_shift.ll
default: application.ll simple_control_flow.ll inferBound.ll inferBoundControlFlow.ll e_exp.ll sincosf.ll e_log.ll e_acosh.ll e_j0.ll e_y0.ll e_rem_pio2.ll benchmark_suite.ll phi_two_global_arrays.ll func_call.ll test_shift.ll vec_add.ll vec_add_8.ll

%.ll : %.c
@echo Compiling $*.c
Expand Down
80 changes: 80 additions & 0 deletions applications/newton/llvm-ir/c-files/vec_add.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* compile with 'clang --target=aarch64-arm-none-eabi -O1 vec_add.c -o vec_add -fvectorize'
* */

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>

typedef struct timespec timespec;
timespec diff(timespec start, timespec end)
{
timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return temp;
}

timespec sum(timespec t1, timespec t2) {
timespec temp;
if (t1.tv_nsec + t2.tv_nsec >= 1000000000) {
temp.tv_sec = t1.tv_sec + t2.tv_sec + 1;
temp.tv_nsec = t1.tv_nsec + t2.tv_nsec - 1000000000;
} else {
temp.tv_sec = t1.tv_sec + t2.tv_sec;
temp.tv_nsec = t1.tv_nsec + t2.tv_nsec;
}
return temp;
}

void printTimeSpec(timespec t, const char* prefix) {
printf("%s: %d.%09d\n", prefix, (int)t.tv_sec, (int)t.tv_nsec);
}

timespec tic( )
{
timespec start_time;
clock_gettime(CLOCK_REALTIME, &start_time);
return start_time;
}

void toc( timespec* start_time, const char* prefix )
{
timespec current_time;
clock_gettime(CLOCK_REALTIME, &current_time);
printTimeSpec( diff( *start_time, current_time ), prefix );
*start_time = current_time;
}

typedef int32_t bmx055fAcceleration;

#define NUM 102400

void vec_add(bmx055fAcceleration *vec_A, bmx055fAcceleration *vec_B, bmx055fAcceleration *vec_C, int len_vec) {
int i;
for (i=0; i<len_vec; i++) {
vec_C[i] = vec_A[i] + vec_B[i];
}
}

int main() {
int32_t x[NUM], y[NUM], z[NUM];
for (size_t idx = 0; idx < NUM; idx++) {
x[idx] = rand() % INT8_MAX;
y[idx] = rand() % INT8_MAX;
}
timespec timer = tic();
vec_add(x, y, z, NUM);
toc(&timer, "computation delay");
for (size_t idx = 0; idx < NUM; idx++) {
printf("value of z[%d]=%d, ", idx, z[idx]);
}
return 0;
}
78 changes: 78 additions & 0 deletions applications/newton/llvm-ir/c-files/vec_add_8.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* compile with 'clang --target=aarch64-arm-none-eabi -O1 vec_add_8.c -o vec_add_8 -fvectorize'
* */

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>

typedef struct timespec timespec;
timespec diff(timespec start, timespec end)
{
timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return temp;
}

timespec sum(timespec t1, timespec t2) {
timespec temp;
if (t1.tv_nsec + t2.tv_nsec >= 1000000000) {
temp.tv_sec = t1.tv_sec + t2.tv_sec + 1;
temp.tv_nsec = t1.tv_nsec + t2.tv_nsec - 1000000000;
} else {
temp.tv_sec = t1.tv_sec + t2.tv_sec;
temp.tv_nsec = t1.tv_nsec + t2.tv_nsec;
}
return temp;
}

void printTimeSpec(timespec t, const char* prefix) {
printf("%s: %d.%09d\n", prefix, (int)t.tv_sec, (int)t.tv_nsec);
}

timespec tic( )
{
timespec start_time;
clock_gettime(CLOCK_REALTIME, &start_time);
return start_time;
}

void toc( timespec* start_time, const char* prefix )
{
timespec current_time;
clock_gettime(CLOCK_REALTIME, &current_time);
printTimeSpec( diff( *start_time, current_time ), prefix );
*start_time = current_time;
}

#define NUM 102400

void vec_add(int8_t *vec_A, int8_t *vec_B, int8_t *vec_C, int len_vec) {
int i;
for (i=0; i<len_vec; i++) {
vec_C[i] = vec_A[i] + vec_B[i];
}
}

int main() {
int8_t x[NUM], y[NUM], z[NUM];
for (size_t idx = 0; idx < NUM; idx++) {
x[idx] = rand() % INT8_MAX;
y[idx] = rand() % INT8_MAX;
}
timespec timer = tic();
vec_add(x, y, z, NUM);
toc(&timer, "computation delay");
for (size_t idx = 0; idx < NUM; idx++) {
printf("value of z[%d]=%d, ", idx, z[idx]);
}
return 0;
}
69 changes: 69 additions & 0 deletions applications/newton/llvm-ir/c-files/vectorize_experiment.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Experiment Results of vectorization

## Compile only with `Clang`
### x86-64
```bash
clang -O1 vec_add.c -o vec_add # 0.000209616 s
clang -O1 vec_add.c -o vec_add -fvectorize # 0.000157489 s
clang -O1 vec_add_8.c -o vec_add_8 # 0.000111221 s
clang -O1 vec_add_8.c -o vec_add_8 -fvectorize # 0.000048906 s
```

#### arm64
```bash
clang --target=aarch64-arm-none-eabi -O1 vec_add.c -o vec_add # 0.001143304 s
clang --target=aarch64-arm-none-eabi -O1 vec_add.c -o vec_add -fvectorize # 0.000856311 s
clang --target=aarch64-arm-none-eabi -O1 vec_add_8.c -o vec_add_8 # 0.000776979 s
clang --target=aarch64-arm-none-eabi -O1 vec_add_8.c -o vec_add_8 -fvectorize # 0.000201536 s
```

## Compile with `Clang` and `opt`
### x 86-64
```bash
clang -O0 -g -Xclang -disable-O0-optnone vec_add.c -S -emit-llvm -o vec_add.ll
opt vec_add.ll --O1 -S -o vec_add_none_opt.ll
clang vec_add_none_opt.ll -o vec_add_none_opt
./vec_add_none_opt # 0.000328377 s
opt vec_add.ll --O1 --loop-vectorize -S -o vec_add_opt.ll
clang vec_add_opt.ll -o vec_add_opt
./vec_add_opt # 0.000319101 s
clang -O0 -g -Xclang -disable-O0-optnone vec_add_8.c -S -emit-llvm -o vec_add_8.ll
opt vec_add_8.ll --O1 -S -o vec_add_8_none_opt.ll
clang vec_add_8_none_opt.ll -o vec_add_8_none_opt
./vec_add_8_none_opt # 0.000207441 s
opt vec_add_8.ll --O1 --loop-vectorize -S -o vec_add_8_opt.ll
clang vec_add_8_opt.ll -o vec_add_8_opt
./vec_add_8_opt # 0.000206795 s
```

### arm64
```bash
clang --target=aarch64-arm-none-eabi -O0 -g -Xclang -disable-O0-optnone vec_add.c -S -emit-llvm -o vec_add.ll
opt vec_add.ll --O1 -S -o vec_add_none_opt.ll
clang --target=aarch64-arm-none-eabi vec_add_none_opt.ll -o vec_add_none_opt
./vec_add_none_opt # 0.002345815 s
opt vec_add.ll --O1 --loop-vectorize -S -o vec_add_opt.ll
clang --target=aarch64-arm-none-eabi vec_add_opt.ll -o vec_add_opt
./vec_add_opt # 0.000947018 s
clang --target=aarch64-arm-none-eabi -O0 -g -Xclang -disable-O0-optnone vec_add_8.c -S -emit-llvm -o vec_add_8.ll
opt vec_add_8.ll --O1 -S -o vec_add_8_none_opt.ll
clang --target=aarch64-arm-none-eabi vec_add_8_none_opt.ll -o vec_add_8_none_opt
./vec_add_8_none_opt # 0.002099071 s
opt vec_add_8.ll --O1 --loop-vectorize -S -o vec_add_8_opt.ll
clang --target=aarch64-arm-none-eabi vec_add_8_opt.ll -o vec_add_8_opt
./vec_add_8_opt # 0.000227494 s
```

## Run with Newton Compiler
```bash
cd ../../../../src/newton
./newton-linux-EN --llvm-ir=../../applications/newton/llvm-ir/vec_add.ll --llvm-ir-liveness-check ../../applications/newton/sensors/test.nt
cd -
llvm-dis ../vec_add_output.bc
opt ../vec_add.ll --O1 --loop-vectorize -S -o vec_add_opt.ll
clang vec_add_opt.ll -o vec_add_opt
./vec_add_opt # 0.000318110 s
opt ../vec_add_output.ll --O1 --loop-vectorize -S -o vec_add_output_opt.ll
clang vec_add_output_opt.ll -o vec_add_output_opt
./vec_add_output_opt # 0.000205080 s
```
60 changes: 43 additions & 17 deletions src/newton/newton-irPass-LLVMIR-shrinkTypeByRange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,13 @@ varType
getFloatingTypeEnum(double min, double max)
{
varType finalType;
if ((std::abs(min) < FLT_MAX) && (std::abs(max) < FLT_MAX))
if ((FLT_EPSILON < std::abs(min) && std::abs(min) < FLT_MAX) &&
(FLT_EPSILON < std::abs(max) && std::abs(max) < FLT_MAX))
{
finalType = FLOAT;
}
else if ((std::abs(min) < DBL_MAX) && (std::abs(max) < DBL_MAX))
else if ((DBL_EPSILON < std::abs(min) && std::abs(min) < DBL_MAX) &&
(DBL_EPSILON < std::abs(max) && std::abs(max) < DBL_MAX))
{
finalType = DOUBLE;
}
Expand Down Expand Up @@ -925,6 +927,11 @@ matchDestType(State * N, Instruction * inInstruction, BasicBlock & llvmIrBasicBl
typeInfo backType;
backType.signFlag = isSignedValue(inInstruction);
backType.valueType = inInstType;
if (isa<LoadInst>(inInstruction))
{
unsigned ptAddressSpace = srcType->getPointerAddressSpace();
backType.valueType = backType.valueType->getPointerTo(ptAddressSpace);
}
for (size_t id = 0; id < inInstruction->getNumOperands(); id++)
{
auto newTypeValue = rollbackType(N, inInstruction, id, llvmIrBasicBlock, typeChangedInst, backType);
Expand Down Expand Up @@ -974,7 +981,13 @@ matchDestType(State * N, Instruction * inInstruction, BasicBlock & llvmIrBasicBl
/*
* roll back operands to typeInformation.valueType
* */
for (size_t id = 0; id < inInstruction->getNumOperands(); id++)
if (isa<LoadInst>(inInstruction))
{
unsigned ptAddressSpace = srcType->getPointerAddressSpace();
typeInformation.valueType = typeInformation.valueType->getPointerTo(ptAddressSpace);
}
size_t roll_backed_op_num = isa<GetElementPtrInst>(inInstruction) ? 1 : inInstruction->getNumOperands();
for (size_t id = 0; id < roll_backed_op_num; id++)
{
typeInfo operandPrevTypeInfo{typeInformation.valueType,
isSignedValue(inInstruction->getOperand(id))};
Expand Down Expand Up @@ -1496,6 +1509,10 @@ mergeCast(State * N, Function & llvmIrFunction,
Instruction * llvmIrInstruction = &*itBB++;
switch (llvmIrInstruction->getOpcode())
{
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::SIToFP:
case Instruction::UIToFP:
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPExt:
Expand Down Expand Up @@ -1540,7 +1557,23 @@ mergeCast(State * N, Function & llvmIrFunction,
* */
Value * castInst;
auto valueType = llvmIrInstruction->getType();
if (valueType->isIntegerTy())
if ((valueType->isFloatTy() || valueType->isDoubleTy()) &&
sourceOperand->getType()->isIntegerTy())
{
// float fa = (float)ia;
bool isSigned = sourceInst->getOpcode() == Instruction::SIToFP;
castInst = isSigned ? Builder.CreateSIToFP(sourceOperand, valueType)
: Builder.CreateUIToFP(sourceOperand, valueType);
}
else if (valueType->isIntegerTy() &&
(sourceOperand->getType()->isFloatTy() || sourceOperand->getType()->isDoubleTy()))
{
// int iq = (int)fq;
bool isSigned = sourceInst->getOpcode() == Instruction::FPToSI;
castInst = isSigned ? Builder.CreateFPToSI(sourceOperand, valueType)
: Builder.CreateFPToUI(sourceOperand, valueType);
}
else if (valueType->isIntegerTy())
{
castInst = Builder.CreateIntCast(sourceOperand, valueType,
llvmIrInstruction->getOpcode() == Instruction::SExt);
Expand Down Expand Up @@ -1648,6 +1681,10 @@ countCastInst(State * N, Function & llvmIrFunction)
{
switch (llvmIrInstruction.getOpcode())
{
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::SIToFP:
case Instruction::UIToFP:
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPExt:
Expand Down Expand Up @@ -1827,19 +1864,8 @@ shrinkType(State * N, BoundInfo * boundInfo, Function & llvmIrFunction)
* 1. construct instruction dependency link
* 2. work with roll back strategies
* */
std::vector<std::vector<Value *>> prevDepLink = getDependencyLink(N, llvmIrFunction);
std::map<Value *, typeInfo> typeChangedInst = shrinkInstType(N, boundInfo, llvmIrFunction);
mergeCast(N, llvmIrFunction, boundInfo->virtualRegisterRange, typeChangedInst);
std::vector<std::vector<Value *>> newDepLink = getDependencyLink(N, llvmIrFunction);

for (auto & depLink : newDepLink)
{
if (rollBackStrategy(N, depLink))
{
rollBackDependencyLink(N, depLink, boundInfo->virtualRegisterRange, typeChangedInst);
}
}
std::map<Value *, typeInfo> typeChangedInst = shrinkInstType(N, boundInfo, llvmIrFunction);

mergeCast(N, llvmIrFunction, boundInfo->virtualRegisterRange, typeChangedInst);
}
}
}

0 comments on commit 50853c4

Please sign in to comment.