Update README instructions

ldbc · Sep 12, 2022 · 6fab847 · 6fab847
1 parent 5721546
commit 6fab847
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -58,21 +58,27 @@ Spark 3.2.x is the recommended runtime to use. The rest of the instructions are
 To place Spark under `/opt/`:
 
 ```bash
-curl https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz | sudo tar -xz -C /opt/
-export SPARK_HOME="/opt/spark-3.2.2-bin-hadoop3.2"
-export PATH="${SPARK_HOME}/bin":"${PATH}"
+scripts/get-spark-to-opt.sh
 ```
 
-To place under `~/`:
+To place it under `${HOME}/`:
 
 ```bash
-curl https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz | tar -xz -C ~/
-export SPARK_HOME=~/spark-3.2.2-bin-hadoop3.2
-export PATH="${SPARK_HOME}/bin":"${PATH}"
+scripts/get-spark-to-home.sh
 ```
 
 Both Java 8 and Java 11 are supported.
 
+#### Building the project
+
+Run:
+
+```bash
+scripts/build.sh
+```
+
+#### Running the generator
+
 Once you have Spark in place and built the JAR file, run the generator as follows:
 
 ```bash
@@ -90,7 +96,7 @@ The runtime configuration arguments determine the amount of memory, number of th
 ./tools/run.py --help
 ```
 
-To generate a single `part-*.csv` file, reduce the parallelism (number of Spark partitions) to 1.
+To generate a single `part-*` file, reduce the parallelism (number of Spark partitions) to 1.
 
 ```bash
 ./tools/run.py --parallelism 1 -- --format csv --scale-factor 0.003 --mode interactive

diff --git a/scripts/get-spark-to-home.sh b/scripts/get-spark-to-home.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -eu
+cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+curl https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz | tar -xz -C ${HOME}/
+export SPARK_HOME="${HOME}/spark-3.2.2-bin-hadoop3.2"
+export PATH="${SPARK_HOME}/bin":"${PATH}"
diff --git a/scripts/get-spark-to-opt.sh b/scripts/get-spark-to-opt.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -eu
+cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+curl https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz | sudo tar -xz -C /opt/
+export SPARK_HOME="/opt/spark-3.2.2-bin-hadoop3.2"
+export PATH="${SPARK_HOME}/bin":"${PATH}"