diff --git a/.github/workflows/ci_test.yml b/.github/workflows/ci_test.yml
index 6b6ac9010..12cb4f6b6 100644
--- a/.github/workflows/ci_test.yml
+++ b/.github/workflows/ci_test.yml
@@ -10,7 +10,7 @@ on:
       - master
       - r*
 env:
-  USE_BAZEL_VERSION: "3.7.2"
+  USE_BAZEL_VERSION: "5.1.1"
 
 jobs:
   yapf-test:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 10747c0ab..e796ceddb 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -17,7 +17,7 @@ on:
 env:
   MIN_PY_VERSION: '3.7'
   MAX_PY_VERSION: '3.9'
-  USE_BAZEL_VERSION: "3.7.2"
+  USE_BAZEL_VERSION: "5.1.1"
   HOROVOD_VERSION: '0.23.0'
 
 jobs:
@@ -168,11 +168,15 @@ jobs:
     name: Upload dev container to DockerHub
     needs: [release-wheel, test-with-bazel]
     runs-on: ubuntu-18.04
+    strategy:
+      matrix:
+        py-version: ['3.7', '3.8', '3.9']
     if: (github.event_name == 'push' && github.ref == 'refs/heads/master')
     steps:
       - uses: actions/checkout@v2
       - run: |
           set -e -x
           echo ${{ secrets.DOCKER_PW }} | docker login --username ${{ secrets.DOCKER_USER }} --password-stdin
+          export PY_VERSION=${{ matrix.py-version }}
           bash .github/workflows/github_build_dev_container.sh
-          docker push tfra/dev_container:latest-cpu
\ No newline at end of file
+          docker push tfra/dev_container:latest-python${{ matrix.py-version }}
\ No newline at end of file
diff --git a/README.md b/README.md
index 0aa65ebbe..ac00fa444 100644
--- a/README.md
+++ b/README.md
@@ -134,8 +134,15 @@ but maybe this doc can help you : [Extract headers from TensorFlow compiling dir
 At the same time, we find some OPs used by TRFA have better performance, so we highly recommend you update TensorFlow to 2.x.
 
 #### Installing from Source
+
+For all developers, we recommend you use the development docker containers which are all GPU enabled:
+```shell
+docker pull tfra/dev_container:latest-python3.8  # "3.7", "3.9" are all avaliable.
+docker run --privileged --gpus all -it --rm -v $(pwd):$(pwd) tfra/dev_container:latest-3.8
+```
+
 ##### CPU Only
-You can also install from source. This requires the [Bazel](https://bazel.build/) build system (version == 3.7.2).
+You can also install from source. This requires the [Bazel](https://bazel.build/) build system (version == 5.1.1).
 Please install a TensorFlow on your compiling machine, The compiler needs to know the version of Tensorflow and 
 its headers according to the installed TensorFlow. 
 
@@ -158,18 +165,20 @@ pip install artifacts/tensorflow_recommenders_addons-*.whl
 ##### GPU Support
 Only `TF_NEED_CUDA=1` is required and other environment variables are optional:
 ```shell
-PY_VERSION="3.7" \
-TF_NEED_CUDA=1 \
-TF_CUDA_VERSION=11.2 \
-TF_CUDNN_VERSION=8.1 \
-CUDA_TOOLKIT_PATH="/usr/local/cuda" \
-CUDNN_INSTALL_PATH="/usr/lib/x86_64-linux-gnu" \
+export TF_VERSION="2.5.1"  # "2.7.0", "2.5.1" are well tested.
+export PY_VERSION="3.8" 
+export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=11.2
+export TF_CUDNN_VERSION=8.1
+export CUDA_TOOLKIT_PATH="/usr/local/cuda"
+export CUDNN_INSTALL_PATH="/usr/lib/x86_64-linux-gnu"
+
 python configure.py
 ```
 And then build the pip package and install:
 ```shell
 bazel build --enable_runfiles build_pip_pkg
-TF_NEED_CUDA=1 bazel-bin/build_pip_pkg artifacts
+bazel-bin/build_pip_pkg artifacts
 pip install artifacts/tensorflow_recommenders_addons_gpu-*.whl
 ```
 ##### Apple Silicon Support (Beta Release)
diff --git a/configure.py b/configure.py
index 553a93fdd..c9bdde59c 100644
--- a/configure.py
+++ b/configure.py
@@ -40,10 +40,10 @@ def _VALID_BAZEL_VERSION(tf_version):
         'refering to the previous COMMIT to compile properly by themselves.')
     return target_bazel
   elif tf_version >= "2.0.0":
-    target_bazel = "3.7.2"
+    target_bazel = "5.1.1"
     logging.info(
         'To ensure code compatibility with Bazel rules_foreign_cc component, '
-        'we specify Bazel version greater than 3.7.2 '
+        'we specify Bazel version greater than 5.1.1 '
         'for Tensorflow versions greater than 2.0.0.')
     return target_bazel
   else:
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/cuckoo_hashtable_op_gpu.cu.cc b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/cuckoo_hashtable_op_gpu.cu.cc
index d371ee859..253a23296 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/cuckoo_hashtable_op_gpu.cu.cc
+++ b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/cuckoo_hashtable_op_gpu.cu.cc
@@ -115,7 +115,6 @@ class CuckooHashTableOfTensorsGpu final : public LookupInterface {
               const Tensor& default_value) override {
     size_t len = d_keys.flat<K>().size();
     bool* d_status;
-    gpu::ValueArrayBase<V>* d_default_value;
 
     auto value_flat = value->flat_inner_dims<V, 2>();
     const auto default_flat = default_value.flat<V>();
@@ -140,7 +139,6 @@ class CuckooHashTableOfTensorsGpu final : public LookupInterface {
         CUDA_CHECK(cudaStreamSynchronize(_stream));
       }
       CUDA_CHECK(cudaFree(d_status));
-      CUDA_CHECK(cudaFree(d_default_value));
       CUDA_CHECK(cudaStreamDestroy(_stream));
     }
     return Status::OK();
@@ -150,7 +148,6 @@ class CuckooHashTableOfTensorsGpu final : public LookupInterface {
                         Tensor* value, const Tensor& default_value,
                         Tensor* exists) {
     size_t len = d_keys.flat<K>().size();
-    gpu::ValueArrayBase<V>* d_default_value;
 
     auto value_flat = value->flat_inner_dims<V, 2>();
     const auto default_flat = default_value.flat<V>();
@@ -173,7 +170,6 @@ class CuckooHashTableOfTensorsGpu final : public LookupInterface {
                     _stream, is_full_default);
         CUDA_CHECK(cudaStreamSynchronize(_stream));
       }
-      CUDA_CHECK(cudaFree(d_default_value));
       CUDA_CHECK(cudaStreamDestroy(_stream));
     }
     return Status::OK();
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h
index b14c42b52..f28b138cb 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h
+++ b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h
@@ -116,9 +116,13 @@ class TableWrapperBase {
  public:
   virtual ~TableWrapperBase() {}
   virtual bool insert_or_assign(K key, ConstTensor2D<V>& value_flat,
-                                int64 value_dim, int64 index) {}
+                                int64 value_dim, int64 index) {
+    return false;
+  }
   virtual bool insert_or_accum(K key, ConstTensor2D<V>& value_or_delta_flat,
-                               bool exist, int64 value_dim, int64 index) {}
+                               bool exist, int64 value_dim, int64 index) {
+    return false;
+  }
   virtual void find(const K& key, Tensor2D<V>& value_flat,
                     ConstTensor2D<V>& default_flat, int64 value_dim,
                     bool is_full_size_default, int64 index) const {}
@@ -126,7 +130,7 @@ class TableWrapperBase {
                     ConstTensor2D<V>& default_flat, bool& exist,
                     int64 value_dim, bool is_full_size_default,
                     int64 index) const {}
-  virtual size_t size() const {}
+  virtual size_t size() const { return 0; }
   virtual void clear() {}
   virtual bool erase(const K& key) {}
   virtual Status export_values(OpKernelContext* ctx, int64 value_dim) {}
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_gpu.h b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_gpu.h
index 4b29a2f07..5415e0bbc 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_gpu.h
+++ b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_gpu.h
@@ -71,8 +71,8 @@ class TableWrapperBase {
   virtual void get(const K* d_keys, ValueType<V>* d_vals, bool* d_status,
                    size_t len, ValueType<V>* d_def_val, cudaStream_t stream,
                    bool is_full_size_default) const {}
-  virtual size_t get_size(cudaStream_t stream) const {}
-  virtual size_t get_capacity() const {}
+  virtual size_t get_size(cudaStream_t stream) const { return 0; }
+  virtual size_t get_capacity() const { return 0; }
   virtual void remove(const K* d_keys, size_t len, cudaStream_t stream) {}
   virtual void clear(cudaStream_t stream) {}
 };
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/python/keras/layers/embedding.py b/tensorflow_recommenders_addons/dynamic_embedding/python/keras/layers/embedding.py
index 1868e8c8e..b76eacf14 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/python/keras/layers/embedding.py
+++ b/tensorflow_recommenders_addons/dynamic_embedding/python/keras/layers/embedding.py
@@ -311,17 +311,23 @@ class FieldWiseEmbedding(BasicEmbedding):
   ```python
   nslots = 3
   @tf.function
-  def map_slot_fn(feature_id):
+  def feature_to_slot(feature_id):
     field_id = tf.math.mod(feature_id, nslots)
     return field_id
 
   ids = tf.constant([[23, 12, 0], [9, 13, 10]], dtype=tf.int64)
-  embedding = de.layers.FieldWiseEmbedding(1, nslots, map_slot_fn)
+  embedding = de.layers.FieldWiseEmbedding(2,
+                                           nslots,
+                                           slot_map_fn=feature_to_slot,
+                                           initializer=tf.keras.initializer.Zeros())
+
+  out = embedding(ids)
+  # [[[0., 0.], [0., 0.], [0., 1.]]
+  #  [[0., 0.], [0., 0.], [0., 1.]]]
 
   prepared_keys = tf.range(0, 100, dtype=tf.int64)
   prepared_values = tf.ones((100, 2), dtype=tf.float32)
   embedding.params.upsert(prepared_keys, prepared_values)
-
   out = embedding(ids)
   # [[2., 2.], [0., 0.], [1., 1.]]
   # [[1., 1.], [2., 2.], [0., 0.]]
diff --git a/tools/build_dev_container.sh b/tools/build_dev_container.sh
index 609a61189..d9096dbe8 100755
--- a/tools/build_dev_container.sh
+++ b/tools/build_dev_container.sh
@@ -1,10 +1,11 @@
 #!/usr/bin/env bash
 
 set -x -e
-
-DOCKER_BUILDKIT=1 docker build \
+docker build \
     -f tools/docker/dev_container.Dockerfile \
     --build-arg TF_VERSION=2.5.1 \
-    --build-arg TF_PACKAGE=tensorflow-cpu \
-    --target dev_container_cpu \
-    -t tfaddons/dev_container:latest-cpu ./
+    --build-arg TF_PACKAGE=tensorflow-gpu \
+    --build-arg PY_VERSION=$PY_VERSION \
+    --no-cache \
+    --target dev_container \
+    -t tfra/dev_container:latest-python$PY_VERSION ./
\ No newline at end of file
diff --git a/tools/docker/cpu_tests.Dockerfile b/tools/docker/cpu_tests.Dockerfile
index 3cfa798de..2fce977c1 100644
--- a/tools/docker/cpu_tests.Dockerfile
+++ b/tools/docker/cpu_tests.Dockerfile
@@ -2,7 +2,7 @@
 FROM python:3.7 as build_wheel
 
 ARG TF_VERSION=2.5.1
-ARG USE_BAZEL_VERSION=3.7.2
+ARG USE_BAZEL_VERSION=5.1.1
 ARG MPI_VERSION="4.1.1"
 ARG HOROVOD_VERSION="0.23.0"
 
diff --git a/tools/docker/cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython.Dockerfile b/tools/docker/cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython.Dockerfile
index d1f0df738..3bfbafc57 100644
--- a/tools/docker/cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython.Dockerfile
+++ b/tools/docker/cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython.Dockerfile
@@ -79,7 +79,7 @@ RUN apt-get update && apt-get install -y \
     rm -rf /var/lib/apt/lists/*
 
 COPY install/install_bazel.sh /install/
-RUN /install/install_bazel.sh "3.7.2"
+RUN /install/install_bazel.sh "5.1.1"
 
 COPY install/build_and_install_python.sh /install/
 RUN /install/build_and_install_python.sh "3.5.9"
diff --git a/tools/docker/cuda11.2-cudnn8.1-ubuntu18.04-manylinux2010-multipython.Dockerfile b/tools/docker/cuda11.2-cudnn8.1-ubuntu18.04-manylinux2010-multipython.Dockerfile
index d55260d83..4cf41d20b 100644
--- a/tools/docker/cuda11.2-cudnn8.1-ubuntu18.04-manylinux2010-multipython.Dockerfile
+++ b/tools/docker/cuda11.2-cudnn8.1-ubuntu18.04-manylinux2010-multipython.Dockerfile
@@ -77,9 +77,6 @@ RUN apt-get update && apt-get install -y \
 RUN chmod 777 /tmp/
 WORKDIR /tmp/
 
-COPY install/install_trt.sh /install/
-RUN /install/install_trt.sh "7.2.3-1+cuda11.1"
-
 COPY install/install_nccl.sh /install/
 RUN /install/install_nccl.sh "2.8.4-1+cuda11.2"
 
@@ -87,7 +84,7 @@ COPY install/install_rocksdb.sh /install/
 RUN /install/install_rocksdb.sh "6.22.1"
 
 COPY install/install_bazel.sh /install/
-RUN /install/install_bazel.sh "3.7.2"
+RUN /install/install_bazel.sh "5.1.1"
 
 COPY install/build_and_install_python.sh /install/
 RUN /install/build_and_install_python.sh "3.6.9"
diff --git a/tools/docker/dev_container.Dockerfile b/tools/docker/dev_container.Dockerfile
index 8b4e4738a..0306baa9d 100644
--- a/tools/docker/dev_container.Dockerfile
+++ b/tools/docker/dev_container.Dockerfile
@@ -1,17 +1,19 @@
 #syntax=docker/dockerfile:1.1.5-experimental
-FROM tensorflow/tensorflow:2.1.0-custom-op-ubuntu16 as dev_container_cpu
+ARG PY_VERSION
+ARG IMAGE_TYPE
+
+# Currenly all of our dev images are GPU capable but at a cost of being quite large.
+# See https://github.com/tensorflow/build/pull/47
+FROM tensorflow/build:latest-python$PY_VERSION as dev_container
 ARG TF_PACKAGE
 ARG TF_VERSION
 
-# Temporary until custom-op container is updated
-RUN ln -sf /usr/bin/python3 /usr/bin/python
-RUN ln -sf /usr/local/bin/pip3 /usr/local/bin/pip
+RUN pip uninstall $TF_PACKAGE -y
 RUN pip install --default-timeout=1000 $TF_PACKAGE==$TF_VERSION
 
 COPY tools/install_deps /install_deps
 COPY requirements.txt /tmp/requirements.txt
-RUN pip install -r /install_deps/black.txt \
-    -r /install_deps/flake8.txt \
+RUN pip install -r /install_deps/yapf.txt \
     -r /install_deps/pytest.txt \
     -r /install_deps/typedapi.txt \
     -r /tmp/requirements.txt
@@ -19,9 +21,12 @@ RUN pip install -r /install_deps/black.txt \
 RUN bash /install_deps/buildifier.sh
 RUN bash /install_deps/clang-format.sh
 
-ENV TFRA_DEV_CONTAINER="1"
+COPY tools/docker/install/install_bazel.sh /install/
+RUN /install/install_bazel.sh "5.1.1"
+
+ENV ADDONS_DEV_CONTAINER="1"
 
 # Clean up
 RUN apt-get autoremove -y \
     && apt-get clean -y \
-    && rm -rf /var/lib/apt/lists/*
+    && rm -rf /var/lib/apt/lists/*
\ No newline at end of file
diff --git a/tools/run_build.sh b/tools/run_build.sh
index 0bf16932f..dbb830298 100644
--- a/tools/run_build.sh
+++ b/tools/run_build.sh
@@ -6,5 +6,5 @@ set -e
 
 DOCKER_BUILDKIT=1 docker build \
     -f tools/docker/sanity_check.Dockerfile \
-    --build-arg USE_BAZEL_VERSION=${USE_BAZEL_VERSION:-"3.7.2"} \
+    --build-arg USE_BAZEL_VERSION=${USE_BAZEL_VERSION:-"5.1.1"} \
     --target=${1} ./
diff --git a/tools/run_sanity_check.sh b/tools/run_sanity_check.sh
index 63acdcd99..e84aa8b60 100644
--- a/tools/run_sanity_check.sh
+++ b/tools/run_sanity_check.sh
@@ -4,5 +4,5 @@ set -e
 
 DOCKER_BUILDKIT=1 docker build \
     -f tools/docker/sanity_check.Dockerfile \
-    --build-arg USE_BAZEL_VERSION=${USE_BAZEL_VERSION:-"3.7.2"} \
+    --build-arg USE_BAZEL_VERSION=${USE_BAZEL_VERSION:-"5.1.1"} \
     ./
\ No newline at end of file