diff --git a/.github/workflows/build-cross-compile.yml b/.github/workflows/build-cross-compile.yml
index 3662a53e3a7..97cc9ce96c6 100644
--- a/.github/workflows/build-cross-compile.yml
+++ b/.github/workflows/build-cross-compile.yml
@@ -147,6 +147,8 @@ jobs:
           --disable-precompiled-headers
           --openjdk-target=${{ matrix.gnu-arch }}-linux-gnu${{ matrix.gnu-abi}}
           --with-sysroot=sysroot
+          --with-cpu-port=arm64
+          --with-abi-profile=arm64
           CC=${{ matrix.gnu-arch }}-linux-gnu${{ matrix.gnu-abi}}-gcc-${{ inputs.gcc-major-version }}
           CXX=${{ matrix.gnu-arch }}-linux-gnu${{ matrix.gnu-abi}}-g++-${{ inputs.gcc-major-version }}
           ${{ inputs.extra-conf-options }} ${{ inputs.configure-arguments }} || (
diff --git a/doc/building.html b/doc/building.html
index 6433786904f..78e1fd0adb9 100644
--- a/doc/building.html
+++ b/doc/building.html
@@ -853,7 +853,6 @@ <h3 id="cross-compiling-with-debian-sysroots">Cross compiling with Debian sysroo
 </table>
 <h3 id="building-for-armaarch64">Building for ARM/aarch64</h3>
 <p>A common cross-compilation target is the ARM CPU. When building for ARM, it is useful to set the ABI profile. A number of pre-defined ABI profiles are available using <code>--with-abi-profile</code>: arm-vfp-sflt, arm-vfp-hflt, arm-sflt, armv5-vfp-sflt, armv6-vfp-hflt. Note that soft-float ABIs are no longer properly supported by the JDK.</p>
-<p>The JDK contains two different ports for the aarch64 platform, one is the original aarch64 port from the <a href="http://openjdk.java.net/projects/aarch64-port">AArch64 Port Project</a> and one is a 64-bit version of the Oracle contributed ARM port. When targeting aarch64, by the default the original aarch64 port is used. To select the Oracle ARM 64 port, use <code>--with-cpu-port=arm64</code>. Also set the corresponding value (<code>aarch64</code> or <code>arm64</code>) to --with-abi-profile, to ensure a consistent build.</p>
 <h3 id="building-for-musl">Building for musl</h3>
 <p>Just like it's possible to cross-compile for a different CPU, it's possible to cross-compile for musl libc on a glibc-based <em>build</em> system. A devkit suitable for most target CPU architectures can be obtained from <a href="https://musl.cc">musl.cc</a>. After installing the required packages in the sysroot, configure the build with <code>--openjdk-target</code>:</p>
 <pre><code>sh ./configure --with-jvm-variants=server \
diff --git a/doc/building.md b/doc/building.md
index d7eb913995f..e900c4735ca 100644
--- a/doc/building.md
+++ b/doc/building.md
@@ -1228,14 +1228,6 @@ available using `--with-abi-profile`: arm-vfp-sflt, arm-vfp-hflt, arm-sflt,
 armv5-vfp-sflt, armv6-vfp-hflt. Note that soft-float ABIs are no longer
 properly supported by the JDK.
 
-The JDK contains two different ports for the aarch64 platform, one is the
-original aarch64 port from the [AArch64 Port Project](
-http://openjdk.java.net/projects/aarch64-port) and one is a 64-bit version of
-the Oracle contributed ARM port. When targeting aarch64, by the default the
-original aarch64 port is used. To select the Oracle ARM 64 port, use
-`--with-cpu-port=arm64`. Also set the corresponding value (`aarch64` or
-`arm64`) to --with-abi-profile, to ensure a consistent build.
-
 ### Building for musl
 
 Just like it's possible to cross-compile for a different CPU, it's possible to
diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4
index cb86f3acf09..94f88ba9a39 100644
--- a/make/autoconf/flags-cflags.m4
+++ b/make/autoconf/flags-cflags.m4
@@ -795,10 +795,6 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_CPU_DEP],
       # -Wno-psabi to get rid of annoying "note: the mangling of 'va_list' has changed in GCC 4.4"
       $1_CFLAGS_CPU="-fsigned-char -Wno-psabi $ARM_ARCH_TYPE_FLAGS $ARM_FLOAT_TYPE_FLAGS -DJDK_ARCH_ABI_PROP_NAME='\"\$(JDK_ARCH_ABI_PROP_NAME)\"'"
       $1_CFLAGS_CPU_JVM="-DARM"
-    elif test "x$FLAGS_CPU" = xaarch64; then
-      if test "x$HOTSPOT_TARGET_CPU_PORT" = xarm64; then
-        $1_CFLAGS_CPU_JVM="-fsigned-char -DARM"
-      fi
     elif test "x$FLAGS_CPU_ARCH" = xppc; then
       $1_CFLAGS_CPU_JVM="-minsert-sched-nops=regroup_exact -mno-multiple -mno-string"
       if test "x$FLAGS_CPU" = xppc64; then
diff --git a/make/autoconf/flags-ldflags.m4 b/make/autoconf/flags-ldflags.m4
index 9db09c38380..1e9320224d6 100644
--- a/make/autoconf/flags-ldflags.m4
+++ b/make/autoconf/flags-ldflags.m4
@@ -190,10 +190,6 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_CPU_DEP],
     elif test "x$OPENJDK_$1_CPU" = xarm; then
       $1_CPU_LDFLAGS_JVM_ONLY="${$1_CPU_LDFLAGS_JVM_ONLY} -fsigned-char"
       $1_CPU_LDFLAGS="$ARM_ARCH_TYPE_FLAGS $ARM_FLOAT_TYPE_FLAGS"
-    elif test "x$FLAGS_CPU" = xaarch64; then
-      if test "x$HOTSPOT_TARGET_CPU_PORT" = xarm64; then
-        $1_CPU_LDFLAGS_JVM_ONLY="${$1_CPU_LDFLAGS_JVM_ONLY} -fsigned-char"
-      fi
     fi
 
   elif test "x$TOOLCHAIN_TYPE" = xsolstudio; then
diff --git a/make/autoconf/flags.m4 b/make/autoconf/flags.m4
index 6e7e9ae6d2b..06311cfd265 100644
--- a/make/autoconf/flags.m4
+++ b/make/autoconf/flags.m4
@@ -34,7 +34,7 @@ m4_include([flags-other.m4])
 AC_DEFUN([FLAGS_SETUP_ABI_PROFILE],
 [
   AC_ARG_WITH(abi-profile, [AS_HELP_STRING([--with-abi-profile],
-      [specify ABI profile for ARM builds (arm-vfp-sflt,arm-vfp-hflt,arm-sflt, armv5-vfp-sflt,armv6-vfp-hflt,arm64,aarch64) @<:@toolchain dependent@:>@ ])])
+      [specify ABI profile for ARM builds (arm-vfp-sflt,arm-vfp-hflt,arm-sflt, armv5-vfp-sflt,armv6-vfp-hflt,aarch64) @<:@toolchain dependent@:>@ ])])
 
   if test "x$with_abi_profile" != x; then
     if test "x$OPENJDK_TARGET_CPU" != xarm && \
@@ -61,10 +61,6 @@ AC_DEFUN([FLAGS_SETUP_ABI_PROFILE],
     elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarmv6-vfp-hflt; then
       ARM_FLOAT_TYPE=vfp-hflt
       ARM_ARCH_TYPE_FLAGS='-march=armv6 -marm'
-    elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarm64; then
-      # No special flags, just need to trigger setting JDK_ARCH_ABI_PROP_NAME
-      ARM_FLOAT_TYPE=
-      ARM_ARCH_TYPE_FLAGS=
     elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xaarch64; then
       # No special flags, just need to trigger setting JDK_ARCH_ABI_PROP_NAME
       ARM_FLOAT_TYPE=
diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
index 2c52fd98c6f..364a376a9b2 100644
--- a/make/autoconf/hotspot.m4
+++ b/make/autoconf/hotspot.m4
@@ -86,8 +86,6 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_VARIANTS],
   AC_ARG_WITH([jvm-variants], [AS_HELP_STRING([--with-jvm-variants],
       [JVM variants (separated by commas) to build (server,client,minimal,core,zero,custom) @<:@server@:>@])])
 
-  SETUP_HOTSPOT_TARGET_CPU_PORT
-
   if test "x$with_jvm_variants" = x; then
     with_jvm_variants="server"
   fi
@@ -335,9 +333,6 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
   if test "x$OPENJDK_TARGET_CPU" = xarm; then
     HOTSPOT_TARGET_CPU=arm_32
     HOTSPOT_TARGET_CPU_DEFINE="ARM32"
-  elif test "x$OPENJDK_TARGET_CPU" = xaarch64 && test "x$HOTSPOT_TARGET_CPU_PORT" = xarm64; then
-    HOTSPOT_TARGET_CPU=arm_64
-    HOTSPOT_TARGET_CPU_ARCH=arm
   fi
 
   # Verify that dependencies are met for explicitly set features.
@@ -562,6 +557,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
 
   # We don't support --with-jvm-interpreter anymore, use zero instead.
   UTIL_DEPRECATED_ARG_WITH(jvm-interpreter)
+  # --with-cpu-port is no longer supported
+  UTIL_DEPRECATED_ARG_WITH(with-cpu-port)
 ])
 
 ###############################################################################
@@ -598,31 +595,6 @@ AC_DEFUN_ONCE([HOTSPOT_FINALIZE_JVM_FEATURES],
   done
 ])
 
-################################################################################
-#
-# Specify which sources will be used to build the 64-bit ARM port
-#
-# --with-cpu-port=arm64   will use hotspot/src/cpu/arm
-# --with-cpu-port=aarch64 will use hotspot/src/cpu/aarch64
-#
-AC_DEFUN([SETUP_HOTSPOT_TARGET_CPU_PORT],
-[
-  AC_ARG_WITH(cpu-port, [AS_HELP_STRING([--with-cpu-port],
-      [specify sources to use for Hotspot 64-bit ARM port (arm64,aarch64) @<:@aarch64@:>@ ])])
-
-  if test "x$with_cpu_port" != x; then
-    if test "x$OPENJDK_TARGET_CPU" != xaarch64; then
-      AC_MSG_ERROR([--with-cpu-port only available on aarch64])
-    fi
-    if test "x$with_cpu_port" != xarm64 && \
-        test "x$with_cpu_port" != xaarch64; then
-      AC_MSG_ERROR([--with-cpu-port must specify arm64 or aarch64])
-    fi
-    HOTSPOT_TARGET_CPU_PORT="$with_cpu_port"
-  fi
-])
-
-
 ################################################################################
 # Check if gtest should be built
 #
diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js
index 8910c335066..8f413ce121f 100644
--- a/make/conf/jib-profiles.js
+++ b/make/conf/jib-profiles.js
@@ -233,8 +233,7 @@ var getJibProfilesCommon = function (input, data) {
     common.main_profile_names = [
         "linux-x64", "linux-x86", "macosx-x64", "solaris-x64",
         "solaris-sparcv9", "windows-x64", "windows-x86", "windows-aarch64",
-        "linux-aarch64", "linux-arm32", "linux-arm64", "linux-arm-vfp-hflt",
-        "linux-arm-vfp-hflt-dyn"
+        "linux-aarch64", "linux-arm32"
     ];
 
     // These are the base setttings for all the main build profiles.
@@ -465,20 +464,7 @@ var getJibProfilesProfiles = function (input, common, data) {
             dependencies: ["devkit", "build_devkit", "cups"],
             configure_args: [
                 "--openjdk-target=aarch64-linux-gnu", "--with-freetype=bundled",
-                "--disable-warnings-as-errors", "--with-cpu-port=aarch64",
-            ],
-        },
-
-        "linux-arm64": {
-            target_os: "linux",
-            target_cpu: "aarch64",
-            build_cpu: "x64",
-            dependencies: ["devkit", "build_devkit", "cups", "headless_stubs"],
-            configure_args: [
-                "--with-cpu-port=arm64",
-                "--with-jvm-variants=server",
-                "--openjdk-target=aarch64-linux-gnu",
-                "--enable-headless-only"
+                "--disable-warnings-as-errors"
             ],
         },
 
@@ -492,30 +478,7 @@ var getJibProfilesProfiles = function (input, common, data) {
                 "--with-abi-profile=arm-vfp-hflt", "--disable-warnings-as-errors"
             ],
         },
-
-        "linux-arm-vfp-hflt": {
-            target_os: "linux",
-            target_cpu: "arm",
-            build_cpu: "x64",
-            dependencies: ["devkit", "build_devkit", "cups"],
-            configure_args: [
-                "--with-jvm-variants=minimal1,client",
-                "--with-x=" + input.get("devkit", "install_path") + "/arm-linux-gnueabihf/libc/usr/X11R6-PI",
-                "--with-fontconfig=" + input.get("devkit", "install_path") + "/arm-linux-gnueabihf/libc/usr/X11R6-PI",
-                "--openjdk-target=arm-linux-gnueabihf",
-                "--with-abi-profile=arm-vfp-hflt",
-                "--with-freetype=bundled"
-            ],
-        },
-
-        // Special version of the SE profile adjusted to be testable on arm64 hardware.
-        "linux-arm-vfp-hflt-dyn": {
-            configure_args: "--with-stdc++lib=dynamic"
-        }
     };
-    // Let linux-arm-vfp-hflt-dyn inherit everything from linux-arm-vfp-hflt
-    profiles["linux-arm-vfp-hflt-dyn"] = concatObjects(
-        profiles["linux-arm-vfp-hflt-dyn"], profiles["linux-arm-vfp-hflt"]);
 
     // Add the base settings to all the main profiles
     common.main_profile_names.forEach(function (name) {
@@ -642,15 +605,6 @@ var getJibProfilesProfiles = function (input, common, data) {
         },
        "linux-arm32": {
             platform: "linux-arm32",
-        },
-       "linux-arm64": {
-            platform: "linux-arm64-vfp-hflt",
-        },
-        "linux-arm-vfp-hflt": {
-            platform: "linux-arm32-vfp-hflt",
-        },
-        "linux-arm-vfp-hflt-dyn": {
-            platform: "linux-arm32-vfp-hflt-dyn",
         }
     }
     // Generate common artifacts for all main profiles
@@ -864,16 +818,8 @@ var getJibProfilesDependencies = function (input, common) {
         solaris_x64: "SS12u4-Solaris11u1+1.0",
         solaris_sparcv9: "SS12u4-Solaris11u1+1.1",
         windows_x64: "VS2017-15.9.16+1.1",
-        linux_aarch64: (input.profile != null && input.profile.indexOf("arm64") >= 0
-                    ? "gcc-linaro-aarch64-linux-gnu-4.8-2013.11_linux+1.0"
-                    : "gcc8.2.0-Fedora27+1.0"),
-        linux_arm: (input.profile != null && input.profile.indexOf("hflt") >= 0
-                    ? "gcc-linaro-arm-linux-gnueabihf-raspbian-2012.09-20120921_linux+1.0"
-                    : (input.profile != null && input.profile.indexOf("arm32") >= 0
-                       ? "gcc8.2.0-Fedora27+1.0"
-                       : "arm-linaro-4.7+1.0"
-                       )
-                    )
+        linux_aarch64: "gcc8.2.0-Fedora27+1.0",
+        linux_arm: "gcc8.2.0-Fedora27+1.0"
     };
 
     var devkit_platform = (input.target_cpu == "x86"
diff --git a/make/hotspot/lib/CompileJvm.gmk b/make/hotspot/lib/CompileJvm.gmk
index 48e75e241c6..cfee2836785 100644
--- a/make/hotspot/lib/CompileJvm.gmk
+++ b/make/hotspot/lib/CompileJvm.gmk
@@ -62,12 +62,6 @@ ifeq ($(call isTargetCpu, x86_64), true)
   OPENJDK_TARGET_CPU_VM_VERSION := amd64
 else ifeq ($(call isTargetCpu, sparcv9), true)
   OPENJDK_TARGET_CPU_VM_VERSION := sparc
-else ifeq ($(HOTSPOT_TARGET_CPU_ARCH), arm)
-  ifeq ($(OPENJDK_TARGET_CPU), aarch64)
-    # This sets the Oracle Aarch64 port to use arm64
-    # while the original Aarch64 port uses aarch64
-    OPENJDK_TARGET_CPU_VM_VERSION := arm64
-  endif
 else
   OPENJDK_TARGET_CPU_VM_VERSION := $(OPENJDK_TARGET_CPU)
 endif
diff --git a/src/hotspot/cpu/arm/abstractInterpreter_arm.cpp b/src/hotspot/cpu/arm/abstractInterpreter_arm.cpp
index efc233a562e..8ff35363d78 100644
--- a/src/hotspot/cpu/arm/abstractInterpreter_arm.cpp
+++ b/src/hotspot/cpu/arm/abstractInterpreter_arm.cpp
@@ -38,19 +38,6 @@
 int AbstractInterpreter::BasicType_as_index(BasicType type) {
   int i = 0;
   switch (type) {
-#ifdef AARCH64
-    case T_BOOLEAN: i = 0; break;
-    case T_CHAR   : i = 1; break;
-    case T_BYTE   : i = 2; break;
-    case T_SHORT  : i = 3; break;
-    case T_INT    : // fall through
-    case T_LONG   : // fall through
-    case T_VOID   : // fall through
-    case T_FLOAT  : // fall through
-    case T_DOUBLE : i = 4; break;
-    case T_OBJECT : // fall through
-    case T_ARRAY  : i = 5; break;
-#else
     case T_VOID   : i = 0; break;
     case T_BOOLEAN: i = 1; break;
     case T_CHAR   : i = 2; break;
@@ -62,7 +49,6 @@ int AbstractInterpreter::BasicType_as_index(BasicType type) {
     case T_LONG   : i = 7; break;
     case T_FLOAT  : i = 8; break;
     case T_DOUBLE : i = 9; break;
-#endif // AARCH64
     default       : ShouldNotReachHere();
   }
   assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds");
@@ -71,7 +57,7 @@ int AbstractInterpreter::BasicType_as_index(BasicType type) {
 
 // How much stack a method activation needs in words.
 int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
-  const int stub_code = AARCH64_ONLY(24) NOT_AARCH64(12);  // see generate_call_stub
+  const int stub_code = 12;  // see generate_call_stub
   // Save space for one monitor to get into the interpreted method in case
   // the method is synchronized
   int monitor_size    = method->is_synchronized() ?
@@ -108,9 +94,6 @@ int AbstractInterpreter::size_activation(int max_stack,
          (moncount*frame::interpreter_frame_monitor_size()) +
          tempcount*Interpreter::stackElementWords + extra_args;
 
-#ifdef AARCH64
-  size = align_up(size, StackAlignmentInBytes/BytesPerWord);
-#endif // AARCH64
 
   return size;
 }
@@ -146,65 +129,7 @@ void AbstractInterpreter::layout_activation(Method* method,
   // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
   // and sender_sp is (fp + sender_sp_offset*wordSize)
 
-#ifdef AARCH64
-  intptr_t* locals;
-  if (caller->is_interpreted_frame()) {
-    // attach locals to the expression stack of caller interpreter frame
-    locals = caller->interpreter_frame_tos_address() + caller_actual_parameters*Interpreter::stackElementWords - 1;
-  } else {
-    assert (is_bottom_frame, "should be");
-    locals = interpreter_frame->fp() + frame::sender_sp_offset + method->max_locals() - 1;
-  }
-
-  if (TraceDeoptimization) {
-    tty->print_cr("layout_activation:");
-
-    if (caller->is_entry_frame()) {
-      tty->print("entry ");
-    }
-    if (caller->is_compiled_frame()) {
-      tty->print("compiled ");
-    }
-    if (caller->is_interpreted_frame()) {
-      tty->print("interpreted ");
-    }
-    tty->print_cr("caller: sp=%p, unextended_sp=%p, fp=%p, pc=%p", caller->sp(), caller->unextended_sp(), caller->fp(), caller->pc());
-    tty->print_cr("interpreter_frame: sp=%p, unextended_sp=%p, fp=%p, pc=%p", interpreter_frame->sp(), interpreter_frame->unextended_sp(), interpreter_frame->fp(), interpreter_frame->pc());
-    tty->print_cr("method: max_locals = %d, size_of_parameters = %d", method->max_locals(), method->size_of_parameters());
-    tty->print_cr("caller_actual_parameters = %d", caller_actual_parameters);
-    tty->print_cr("locals = %p", locals);
-  }
-
-#ifdef ASSERT
-  if (caller_actual_parameters != method->size_of_parameters()) {
-    assert(caller->is_interpreted_frame(), "adjusted caller_actual_parameters, but caller is not interpreter frame");
-    Bytecode_invoke inv(caller->interpreter_frame_method(), caller->interpreter_frame_bci());
-
-    if (is_bottom_frame) {
-      assert(caller_actual_parameters == 0, "invalid adjusted caller_actual_parameters value for bottom frame");
-      assert(inv.is_invokedynamic() || inv.is_invokehandle(), "adjusted caller_actual_parameters for bottom frame, but not invokedynamic/invokehandle");
-    } else {
-      assert(caller_actual_parameters == method->size_of_parameters()+1, "invalid adjusted caller_actual_parameters value");
-      assert(!inv.is_invokedynamic() && MethodHandles::has_member_arg(inv.klass(), inv.name()), "adjusted caller_actual_parameters, but no member arg");
-    }
-  }
-  if (caller->is_interpreted_frame()) {
-    intptr_t* locals_base = (locals - method->max_locals()*Interpreter::stackElementWords + 1);
-    locals_base = align_down(locals_base, StackAlignmentInBytes);
-    assert(interpreter_frame->sender_sp() <= locals_base, "interpreter-to-interpreter frame chaining");
-
-  } else if (caller->is_compiled_frame()) {
-    assert(locals + 1 <= caller->unextended_sp(), "compiled-to-interpreter frame chaining");
-
-  } else {
-    assert(caller->is_entry_frame(), "should be");
-    assert(locals + 1 <= caller->fp(), "entry-to-interpreter frame chaining");
-  }
-#endif // ASSERT
-
-#else
   intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
-#endif // AARCH64
 
   interpreter_frame->interpreter_frame_set_locals(locals);
   BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
@@ -215,44 +140,16 @@ void AbstractInterpreter::layout_activation(Method* method,
   intptr_t* stack_top = (intptr_t*) monbot  -
     tempcount*Interpreter::stackElementWords -
     popframe_extra_args;
-#ifdef AARCH64
-  interpreter_frame->interpreter_frame_set_stack_top(stack_top);
-
-  // We have to add extra reserved slots to max_stack. There are 3 users of the extra slots,
-  // none of which are at the same time, so we just need to make sure there is enough room
-  // for the biggest user:
-  //   -reserved slot for exception handler
-  //   -reserved slots for JSR292. Method::extra_stack_entries() is the size.
-  //   -3 reserved slots so get_method_counters() can save some registers before call_VM().
-  int max_stack = method->constMethod()->max_stack() + MAX2(3, Method::extra_stack_entries());
-  intptr_t* extended_sp = (intptr_t*) monbot  -
-    (max_stack * Interpreter::stackElementWords) -
-    popframe_extra_args;
-  extended_sp = align_down(extended_sp, StackAlignmentInBytes);
-  interpreter_frame->interpreter_frame_set_extended_sp(extended_sp);
-#else
   interpreter_frame->interpreter_frame_set_last_sp(stack_top);
-#endif // AARCH64
 
   // All frames but the initial (oldest) interpreter frame we fill in have a
   // value for sender_sp that allows walking the stack but isn't
   // truly correct. Correct the value here.
 
-#ifdef AARCH64
-  if (caller->is_interpreted_frame()) {
-    intptr_t* sender_sp = align_down(caller->interpreter_frame_tos_address(), StackAlignmentInBytes);
-    interpreter_frame->set_interpreter_frame_sender_sp(sender_sp);
-
-  } else {
-    // in case of non-interpreter caller sender_sp of the oldest frame is already
-    // set to valid value
-  }
-#else
   if (extra_locals != 0 &&
       interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
     interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
   }
-#endif // AARCH64
 
   *interpreter_frame->interpreter_frame_cache_addr() =
     method->constants()->cache();
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
index e0437831af0..594697d34e0 100644
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@@ -67,15 +67,10 @@ static inline bool cache_reachable() {
   return MacroAssembler::_cache_fully_reachable();
 }
 
-#ifdef AARCH64
-#define ldr_32 ldr_w
-#define str_32 str_w
-#else
 #define ldr_32 ldr
 #define str_32 str
 #define tst_32 tst
 #define teq_32 teq
-#endif
 #if 1
 extern bool PrintOptoAssembly;
 #endif
@@ -111,12 +106,7 @@ class HandlerImpl {
   static int emit_deopt_handler(CodeBuffer& cbuf);
 
   static uint size_exception_handler() {
-#ifdef AARCH64
-    // ldr_literal; br; (pad); <literal>
-    return 3 * Assembler::InstructionSize + wordSize;
-#else
     return ( 3 * 4 );
-#endif
   }
 
 
@@ -205,9 +195,6 @@ void emit_hi(CodeBuffer &cbuf, int val) {  }
 const RegMask& MachConstantBaseNode::_out_RegMask = PTR_REG_mask();
 
 int Compile::ConstantTable::calculate_table_base_offset() const {
-#ifdef AARCH64
-  return 0;
-#else
   int offset = -(size() / 2);
   // flds, fldd: 8-bit  offset multiplied by 4: +/- 1024
   // ldr, ldrb : 12-bit offset:                 +/- 4096
@@ -215,7 +202,6 @@ int Compile::ConstantTable::calculate_table_base_offset() const {
     offset = Assembler::min_simm10;
   }
   return offset;
-#endif
 }
 
 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
@@ -240,11 +226,7 @@ void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 }
 
 uint MachConstantBaseNode::size(PhaseRegAlloc*) const {
-#ifdef AARCH64
-  return 5 * Assembler::InstructionSize;
-#else
   return 8;
-#endif
 }
 
 #ifndef PRODUCT
@@ -262,12 +244,6 @@ void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
   for (int i = 0; i < OptoPrologueNops; i++) {
     st->print_cr("NOP"); st->print("\t");
   }
-#ifdef AARCH64
-  if (OptoPrologueNops <= 0) {
-    st->print_cr("NOP\t! required for safe patching");
-    st->print("\t");
-  }
-#endif
 
   size_t framesize = C->frame_size_in_bytes();
   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
@@ -298,11 +274,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   for (int i = 0; i < OptoPrologueNops; i++) {
     __ nop();
   }
-#ifdef AARCH64
-  if (OptoPrologueNops <= 0) {
-    __ nop(); // required for safe patching by patch_verified_entry()
-  }
-#endif
 
   size_t framesize = C->frame_size_in_bytes();
   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
@@ -361,18 +332,8 @@ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 
   if (do_polling() && ra_->C->is_method_compilation()) {
     st->print("\n\t");
-#ifdef AARCH64
-    if (MacroAssembler::page_reachable_from_cache(os::get_polling_page())) {
-      st->print("ADRP     Rtemp, #PollAddr\t! Load Polling address\n\t");
-      st->print("LDR      ZR,[Rtemp + #PollAddr & 0xfff]\t!Poll for Safepointing");
-    } else {
-      st->print("mov_slow Rtemp, #PollAddr\t! Load Polling address\n\t");
-      st->print("LDR      ZR,[Rtemp]\t!Poll for Safepointing");
-    }
-#else
     st->print("MOV    Rtemp, #PollAddr\t! Load Polling address\n\t");
     st->print("LDR    Rtemp,[Rtemp]\t!Poll for Safepointing");
-#endif
   }
 }
 #endif
@@ -390,36 +351,15 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 
   // If this does safepoint polling, then do it here
   if (do_polling() && ra_->C->is_method_compilation()) {
-#ifdef AARCH64
-    if (false && MacroAssembler::page_reachable_from_cache(os::get_polling_page())) {
-/* FIXME: TODO
-      __ relocate(relocInfo::xxx);
-      __ adrp(Rtemp, (intptr_t)os::get_polling_page());
-      __ relocate(relocInfo::poll_return_type);
-      int offset = os::get_polling_page() & 0xfff;
-      __ ldr(ZR, Address(Rtemp + offset));
-*/
-    } else {
-      __ mov_address(Rtemp, (address)os::get_polling_page(), symbolic_Relocation::polling_page_reference);
-      __ relocate(relocInfo::poll_return_type);
-      __ ldr(ZR, Address(Rtemp));
-    }
-#else
     // mov_slow here is usually one or two instruction
     __ mov_address(Rtemp, (address)os::get_polling_page(), symbolic_Relocation::polling_page_reference);
     __ relocate(relocInfo::poll_return_type);
     __ ldr(Rtemp, Address(Rtemp));
-#endif
   }
 }
 
 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
-#ifdef AARCH64
-  // allow for added alignment nop from mov_address bind_literal
-  return MachNode::size(ra_) + 1 * Assembler::InstructionSize;
-#else
   return MachNode::size(ra_);
-#endif
 }
 
 int MachEpilogNode::reloc() const {
@@ -451,16 +391,12 @@ static enum RC rc_class( OptoReg::Name reg ) {
 }
 
 static inline bool is_iRegLd_memhd(OptoReg::Name src_first, OptoReg::Name src_second, int offset) {
-#ifdef AARCH64
-  return is_memoryHD(offset);
-#else
   int rlo = Matcher::_regEncode[src_first];
   int rhi = Matcher::_regEncode[src_second];
   if (!((rlo&1)==0 && (rlo+1 == rhi))) {
     tty->print_cr("CAUGHT BAD LDRD/STRD");
   }
   return (rlo&1)==0 && (rlo+1 == rhi) && is_memoryHD(offset);
-#endif
 }
 
 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
@@ -549,11 +485,6 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
                 Matcher::regName[src_first]);
 #endif
     }
-#ifdef AARCH64
-    if (src_first+1 == src_second && dst_first+1 == dst_second) {
-      return size + 4;
-    }
-#endif
     size += 4;
   }
 
@@ -722,19 +653,11 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
       assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous");
       assert(src_second_rc == rc_int && dst_second_rc == rc_float, "unsupported");
       if (cbuf) {
-#ifdef AARCH64
-        __ fmov_dx(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]));
-#else
         __ fmdrr(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]), reg_to_register_object(Matcher::_regEncode[src_second]));
-#endif
 #ifndef PRODUCT
       } else if (!do_size) {
         if (size != 0) st->print("\n\t");
-#ifdef AARCH64
-        st->print("FMOV_DX   R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first));
-#else
         st->print("FMDRR   R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first), OptoReg::regname(src_second));
-#endif
 #endif
       }
       return size + 4;
@@ -759,19 +682,11 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
       assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
       assert(src_second_rc == rc_float && dst_second_rc == rc_int, "unsupported");
       if (cbuf) {
-#ifdef AARCH64
-        __ fmov_xd(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
-#else
         __ fmrrd(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
-#endif
 #ifndef PRODUCT
       } else if (!do_size) {
         if (size != 0) st->print("\n\t");
-#ifdef AARCH64
-        st->print("FMOV_XD R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first));
-#else
         st->print("FMRRD   R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(dst_second), OptoReg::regname(src_first));
-#endif
 #endif
       }
       return size + 4;
@@ -795,7 +710,6 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
     return size;               // Self copy; no move
   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 
-#ifndef AARCH64
   // Check for integer reg-reg copy.  Hi bits are stuck up in the top
   // 32-bits of a 64-bit register, but are needed in low bits of another
   // register (else it's a hi-bits-to-hi-bits copy which should have
@@ -852,7 +766,6 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
     }
     return size + 4;
   }
-#endif
 
   Unimplemented();
   return 0; // Mute compiler
@@ -910,11 +823,7 @@ void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
     __ add(dst, SP, offset);
   } else {
     __ mov_slow(dst, offset);
-#ifdef AARCH64
-    __ add(dst, SP, dst, ex_lsl);
-#else
     __ add(dst, SP, dst);
-#endif
   }
 }
 
@@ -926,11 +835,7 @@ uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 
 //=============================================================================
 #ifndef PRODUCT
-#ifdef AARCH64
-#define R_RTEMP "R_R16"
-#else
 #define R_RTEMP "R_R12"
-#endif
 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
   st->print_cr("\nUEP:");
   if (UseCompressedClassPointers) {
@@ -952,14 +857,7 @@ void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 
   __ load_klass(Rtemp, receiver);
   __ cmp(Rtemp, iCache);
-#ifdef AARCH64
-  Label match;
-  __ b(match, eq);
-  __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
-  __ bind(match);
-#else
   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
-#endif
 }
 
 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
@@ -1005,24 +903,12 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
   int offset = __ offset();
   address deopt_pc = __ pc();
 
-#ifdef AARCH64
-  // See LR saved by caller in sharedRuntime_arm.cpp
-  // see also hse1 ws
-  // see also LIR_Assembler::emit_deopt_handler
-
-  __ raw_push(LR, LR); // preserve LR in both slots
-  __ mov_relative_address(LR, deopt_pc);
-  __ str(LR, Address(SP, 1 * wordSize)); // save deopt PC
-  // OK to kill LR, because deopt blob will restore it from SP[0]
-  __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, LR_tmp);
-#else
   __ sub(SP, SP, wordSize); // make room for saved PC
   __ push(LR); // save LR that may be live when we get here
   __ mov_relative_address(LR, deopt_pc);
   __ str(LR, Address(SP, wordSize)); // save deopt PC
   __ pop(LR); // restore LR
   __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, noreg);
-#endif
 
   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
 
@@ -1073,21 +959,13 @@ const bool Matcher::match_rule_supported(int opcode) {
   case Op_AddVF:
   case Op_SubVF:
   case Op_MulVF:
-#ifdef AARCH64
-    return VM_Version::has_simd();
-#else
     return VM_Version::has_vfp() || VM_Version::has_simd();
-#endif
   case Op_AddVD:
   case Op_SubVD:
   case Op_MulVD:
   case Op_DivVF:
   case Op_DivVD:
-#ifdef AARCH64
-    return VM_Version::has_simd();
-#else
     return VM_Version::has_vfp();
-#endif
   }
 
   return true;  // Per default match rules are supported.
@@ -1158,11 +1036,7 @@ const bool Matcher::pass_original_key_for_aes() {
 }
 
 const bool Matcher::convL2FSupported(void) {
-#ifdef AARCH64
-  return true;
-#else
   return false;
-#endif
 }
 
 // Is this branch offset short enough that a short branch can be used?
@@ -1181,29 +1055,17 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 
 const bool Matcher::isSimpleConstant64(jlong value) {
   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
-#ifdef AARCH64
-  return (value == 0);
-#else
   return false;
-#endif
 }
 
 // No scaling for the parameter the ClearArray node.
 const bool Matcher::init_array_count_is_in_bytes = true;
 
-#ifdef AARCH64
-const int Matcher::long_cmove_cost() { return 1; }
-#else
 // Needs 2 CMOV's for longs.
 const int Matcher::long_cmove_cost() { return 2; }
-#endif
 
-#ifdef AARCH64
-const int Matcher::float_cmove_cost() { return 1; }
-#else
 // CMOVF/CMOVD are expensive on ARM.
 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
-#endif
 
 // Does the CPU require late expand (see block.cpp for description of late expand)?
 const bool Matcher::require_postalloc_expand = false;
@@ -1211,11 +1073,7 @@ const bool Matcher::require_postalloc_expand = false;
 // Do we need to mask the count passed to shift instructions or does
 // the cpu only look at the lower 5/6 bits anyway?
 // FIXME: does this handle vector shifts as well?
-#ifdef AARCH64
-const bool Matcher::need_masked_shift_count = false;
-#else
 const bool Matcher::need_masked_shift_count = true;
-#endif
 
 const bool Matcher::convi2l_type_required = true;
 
@@ -1261,14 +1119,7 @@ const bool Matcher::rematerialize_float_constants = false;
 // needed.  Else we split the double into 2 integer pieces and move it
 // piece-by-piece.  Only happens when passing doubles into C code as the
 // Java calling convention forces doubles to be aligned.
-#ifdef AARCH64
-// On stack replacement support:
-// We don't need Load[DL]_unaligned support, because interpreter stack
-// has correct alignment
-const bool Matcher::misaligned_doubles_ok = true;
-#else
 const bool Matcher::misaligned_doubles_ok = false;
-#endif
 
 // No-op on ARM.
 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
@@ -1300,10 +1151,6 @@ const bool Matcher::int_in_long = false;
 // Registers not mentioned will be killed by the VM call in the trampoline, and
 // arguments in those registers not be available to the callee.
 bool Matcher::can_be_java_arg( int reg ) {
-#ifdef AARCH64
-  if (reg >= R_R0_num && reg < R_R8_num) return true;
-  if (reg >= R_V0_num && reg <= R_V7b_num && ((reg & 3) < 2)) return true;
-#else
   if (reg == R_R0_num ||
       reg == R_R1_num ||
       reg == R_R2_num ||
@@ -1311,7 +1158,6 @@ bool Matcher::can_be_java_arg( int reg ) {
 
   if (reg >= R_S0_num &&
       reg <= R_S13_num) return true;
-#endif
   return false;
 }
 
@@ -1454,44 +1300,14 @@ encode %{
     Register R8_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
     assert(R8_ic_reg == Ricklass, "should be");
     __ set_inst_mark();
-#ifdef AARCH64
-// TODO: see C1 LIR_Assembler::ic_call()
-    InlinedAddress oop_literal((address)Universe::non_oop_word());
-    int offset = __ offset();
-    int fixed_size = mov_oop_size * 4;
-    if (VM_Version::prefer_moves_over_load_literal()) {
-      uintptr_t val = (uintptr_t)Universe::non_oop_word();
-      __ movz(R8_ic_reg, (val >>  0) & 0xffff,  0);
-      __ movk(R8_ic_reg, (val >> 16) & 0xffff, 16);
-      __ movk(R8_ic_reg, (val >> 32) & 0xffff, 32);
-      __ movk(R8_ic_reg, (val >> 48) & 0xffff, 48);
-    } else {
-      __ ldr_literal(R8_ic_reg, oop_literal);
-    }
-    assert(__ offset() - offset == fixed_size, "bad mov_oop size");
-#else
     __ movw(R8_ic_reg, ((unsigned int)Universe::non_oop_word()) & 0xffff);
     __ movt(R8_ic_reg, ((unsigned int)Universe::non_oop_word()) >> 16);
-#endif
     address  virtual_call_oop_addr = __ inst_mark();
     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
     // who we intended to call.
     int method_index = resolved_method_index(cbuf);
     __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
     emit_call_reloc(cbuf, as_MachCall(), $meth, RelocationHolder::none);
-#ifdef AARCH64
-    if (!VM_Version::prefer_moves_over_load_literal()) {
-      Label skip_literal;
-      __ b(skip_literal);
-      int off2 = __ offset();
-      __ bind_literal(oop_literal);
-      if (__ offset() - off2 == wordSize) {
-        // no padding, so insert nop for worst-case sizing
-        __ nop();
-      }
-      __ bind(skip_literal);
-    }
-#endif
   %}
 
   enc_class LdReplImmI(immI src, regD dst, iRegI tmp, int cnt, int wth) %{
@@ -1558,16 +1374,8 @@ encode %{
     // See if the lengths are different, and calculate min in str1_reg.
     // Stash diff in tmp2 in case we need it for a tie-breaker.
     __ subs_32(tmp2_reg, cnt1_reg, cnt2_reg);
-#ifdef AARCH64
-    Label Lskip;
-    __ _lsl_w(cnt1_reg, cnt1_reg, exact_log2(sizeof(jchar))); // scale the limit
-    __ b(Lskip, mi);
-    __ _lsl_w(cnt1_reg, cnt2_reg, exact_log2(sizeof(jchar))); // scale the limit
-    __ bind(Lskip);
-#else
     __ mov(cnt1_reg, AsmOperand(cnt1_reg, lsl, exact_log2(sizeof(jchar)))); // scale the limit
     __ mov(cnt1_reg, AsmOperand(cnt2_reg, lsl, exact_log2(sizeof(jchar))), pl); // scale the limit
-#endif
 
     // reallocate cnt1_reg, cnt2_reg, result_reg
     // Note:  limit_reg holds the string length pre-scaled by 2
@@ -1717,16 +1525,6 @@ encode %{
     int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
 
     // return true if the same array
-#ifdef AARCH64
-    __ cmp(ary1_reg, ary2_reg);
-    __ b(Lequal, eq);
-
-    __ mov(result_reg, 0);
-
-    __ cbz(ary1_reg, Ldone); // not equal
-
-    __ cbz(ary2_reg, Ldone); // not equal
-#else
     __ teq(ary1_reg, ary2_reg);
     __ mov(result_reg, 1, eq);
     __ b(Ldone, eq); // equal
@@ -1738,19 +1536,12 @@ encode %{
     __ tst(ary2_reg, ary2_reg);
     __ mov(result_reg, 0, eq);
     __ b(Ldone, eq);    // not equal
-#endif
 
     //load the lengths of arrays
     __ ldr_s32(tmp1_reg, Address(ary1_reg, length_offset)); // int
     __ ldr_s32(tmp2_reg, Address(ary2_reg, length_offset)); // int
 
     // return false if the two arrays are not equal length
-#ifdef AARCH64
-    __ cmp_w(tmp1_reg, tmp2_reg);
-    __ b(Ldone, ne);    // not equal
-
-    __ cbz_w(tmp1_reg, Lequal); // zero-length arrays are equal
-#else
     __ teq_32(tmp1_reg, tmp2_reg);
     __ mov(result_reg, 0, ne);
     __ b(Ldone, ne);    // not equal
@@ -1758,7 +1549,6 @@ encode %{
     __ tst(tmp1_reg, tmp1_reg);
     __ mov(result_reg, 1, eq);
     __ b(Ldone, eq);    // zero-length arrays are equal
-#endif
 
     // load array addresses
     __ add(ary1_reg, ary1_reg, base_offset);
@@ -1852,11 +1642,7 @@ frame %{
   sync_stack_slots(1 * VMRegImpl::slots_per_word);
 
   // Compiled code's Frame Pointer
-#ifdef AARCH64
-  frame_pointer(R_SP);
-#else
   frame_pointer(R_R13);
-#endif
 
   // Stack alignment requirement
   stack_alignment(StackAlignmentInBytes);
@@ -1953,7 +1739,6 @@ operand immI16() %{
   interface(CONST_INTER);
 %}
 
-#ifndef AARCH64
 // Integer Immediate: offset for half and double word loads and stores
 operand immIHD() %{
   predicate(is_memoryHD(n->get_int()));
@@ -1972,7 +1757,6 @@ operand immIFP() %{
   format %{ %}
   interface(CONST_INTER);
 %}
-#endif
 
 // Valid scale values for addressing modes and shifts
 operand immU5() %{
@@ -2183,45 +1967,6 @@ operand limmIn() %{
   interface(CONST_INTER);
 %}
 
-#ifdef AARCH64
-// Long Immediate: for logical instruction
-operand limmL() %{
-  predicate(is_limmL(n->get_long()));
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand limmLn() %{
-  predicate(is_limmL(~n->get_long()));
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Long Immediate: for arithmetic instruction
-operand aimmL() %{
-  predicate(is_aimm(n->get_long()));
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand aimmLneg() %{
-  predicate(is_aimm(-n->get_long()));
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-#endif // AARCH64
 
 // Long Immediate: the value FF
 operand immL_FF() %{
@@ -2404,11 +2149,7 @@ operand iRegI() %{
   match(R1RegI);
   match(R2RegI);
   match(R3RegI);
-#ifdef AARCH64
-  match(ZRRegI);
-#else
   match(R12RegI);
-#endif
 
   format %{ %}
   interface(REG_INTER);
@@ -2446,49 +2187,6 @@ operand sp_ptr_RegP() %{
   interface(REG_INTER);
 %}
 
-#ifdef AARCH64
-// Like sp_ptr_reg, but exclude regs (Aarch64 SP) that can't be
-// stored directly.  Includes ZR, so can't be used as a destination.
-operand store_ptr_RegP() %{
-  constraint(ALLOC_IN_RC(store_ptr_reg));
-  match(RegP);
-  match(iRegP);
-  match(ZRRegP);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand store_RegI() %{
-  constraint(ALLOC_IN_RC(store_reg));
-  match(RegI);
-  match(iRegI);
-  match(ZRRegI);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand store_RegL() %{
-  constraint(ALLOC_IN_RC(store_ptr_reg));
-  match(RegL);
-  match(iRegL);
-  match(ZRRegL);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand store_RegN() %{
-  constraint(ALLOC_IN_RC(store_reg));
-  match(RegN);
-  match(iRegN);
-  match(ZRRegN);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-#endif
 
 operand R0RegP() %{
   constraint(ALLOC_IN_RC(R0_regP));
@@ -2578,7 +2276,6 @@ operand R3RegI() %{
   interface(REG_INTER);
 %}
 
-#ifndef AARCH64
 operand R12RegI() %{
   constraint(ALLOC_IN_RC(R12_regI));
   match(iRegI);
@@ -2586,18 +2283,13 @@ operand R12RegI() %{
   format %{ %}
   interface(REG_INTER);
 %}
-#endif
 
 // Long Register
 operand iRegL() %{
   constraint(ALLOC_IN_RC(long_reg));
   match(RegL);
-#ifdef AARCH64
-  match(iRegLd);
-#else
   match(R0R1RegL);
   match(R2R3RegL);
-#endif
 //match(iRegLex);
 
   format %{ %}
@@ -2612,7 +2304,6 @@ operand iRegLd() %{
   interface(REG_INTER);
 %}
 
-#ifndef AARCH64
 // first long arg, or return value
 operand R0R1RegL() %{
   constraint(ALLOC_IN_RC(R0R1_regL));
@@ -2629,7 +2320,6 @@ operand R2R3RegL() %{
   format %{ %}
   interface(REG_INTER);
 %}
-#endif
 
 // Condition Code Flag Register
 operand flagsReg() %{
@@ -2671,7 +2361,6 @@ operand flagsRegP() %{
 %}
 
 // Condition Code Register, long comparisons.
-#ifndef AARCH64
 operand flagsRegL_LTGE() %{
   constraint(ALLOC_IN_RC(int_flags));
   match(RegFlags);
@@ -2719,7 +2408,6 @@ operand flagsRegUL_LEGT() %{
   format %{ "apsr_UL_LEGT" %}
   interface(REG_INTER);
 %}
-#endif
 
 // Condition Code Register, floating comparisons, unordered same as "less".
 operand flagsRegF() %{
@@ -2800,113 +2488,12 @@ operand indirect(sp_ptr_RegP reg) %{
   format %{ "[$reg]" %}
   interface(MEMORY_INTER) %{
     base($reg);
-#ifdef AARCH64
-    index(0xff); // 0xff => no index
-#else
     index(0xf); // PC => no index
-#endif
     scale(0x0);
     disp(0x0);
   %}
 %}
 
-#ifdef AARCH64
-// Indirect with scaled*1 uimm12 offset
-operand indOffsetU12ScaleB(sp_ptr_RegP reg, immUL12 offset) %{
-  constraint(ALLOC_IN_RC(sp_ptr_reg));
-  match(AddP reg offset);
-
-  op_cost(100);
-  format %{ "[$reg + $offset]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-#ifdef AARCH64
-    index(0xff); // 0xff => no index
-#else
-    index(0xf); // PC => no index
-#endif
-    scale(0x0);
-    disp($offset);
-  %}
-%}
-
-// Indirect with scaled*2 uimm12 offset
-operand indOffsetU12ScaleS(sp_ptr_RegP reg, immUL12x2 offset) %{
-  constraint(ALLOC_IN_RC(sp_ptr_reg));
-  match(AddP reg offset);
-
-  op_cost(100);
-  format %{ "[$reg + $offset]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-#ifdef AARCH64
-    index(0xff); // 0xff => no index
-#else
-    index(0xf); // PC => no index
-#endif
-    scale(0x0);
-    disp($offset);
-  %}
-%}
-
-// Indirect with scaled*4 uimm12 offset
-operand indOffsetU12ScaleI(sp_ptr_RegP reg, immUL12x4 offset) %{
-  constraint(ALLOC_IN_RC(sp_ptr_reg));
-  match(AddP reg offset);
-
-  op_cost(100);
-  format %{ "[$reg + $offset]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-#ifdef AARCH64
-    index(0xff); // 0xff => no index
-#else
-    index(0xf); // PC => no index
-#endif
-    scale(0x0);
-    disp($offset);
-  %}
-%}
-
-// Indirect with scaled*8 uimm12 offset
-operand indOffsetU12ScaleL(sp_ptr_RegP reg, immUL12x8 offset) %{
-  constraint(ALLOC_IN_RC(sp_ptr_reg));
-  match(AddP reg offset);
-
-  op_cost(100);
-  format %{ "[$reg + $offset]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-#ifdef AARCH64
-    index(0xff); // 0xff => no index
-#else
-    index(0xf); // PC => no index
-#endif
-    scale(0x0);
-    disp($offset);
-  %}
-%}
-
-// Indirect with scaled*16 uimm12 offset
-operand indOffsetU12ScaleQ(sp_ptr_RegP reg, immUL12x16 offset) %{
-  constraint(ALLOC_IN_RC(sp_ptr_reg));
-  match(AddP reg offset);
-
-  op_cost(100);
-  format %{ "[$reg + $offset]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-#ifdef AARCH64
-    index(0xff); // 0xff => no index
-#else
-    index(0xf); // PC => no index
-#endif
-    scale(0x0);
-    disp($offset);
-  %}
-%}
-
-#else // ! AARCH64
 
 // Indirect with Offset in ]-4096, 4096[
 operand indOffset12(sp_ptr_RegP reg, immI12 offset) %{
@@ -2917,11 +2504,7 @@ operand indOffset12(sp_ptr_RegP reg, immI12 offset) %{
   format %{ "[$reg + $offset]" %}
   interface(MEMORY_INTER) %{
     base($reg);
-#ifdef AARCH64
-    index(0xff); // 0xff => no index
-#else
     index(0xf); // PC => no index
-#endif
     scale(0x0);
     disp($offset);
   %}
@@ -2936,11 +2519,7 @@ operand indOffsetFP(sp_ptr_RegP reg, immIFP offset) %{
   format %{ "[$reg + $offset]" %}
   interface(MEMORY_INTER) %{
     base($reg);
-#ifdef AARCH64
-    index(0xff); // 0xff => no index
-#else
     index(0xf); // PC => no index
-#endif
     scale(0x0);
     disp($offset);
   %}
@@ -2955,11 +2534,7 @@ operand indOffsetHD(sp_ptr_RegP reg, immIHD offset) %{
   format %{ "[$reg + $offset]" %}
   interface(MEMORY_INTER) %{
     base($reg);
-#ifdef AARCH64
-    index(0xff); // 0xff => no index
-#else
     index(0xf); // PC => no index
-#endif
     scale(0x0);
     disp($offset);
   %}
@@ -2974,11 +2549,7 @@ operand indOffsetFPx2(sp_ptr_RegP reg, immX10x2 offset) %{
   format %{ "[$reg + $offset]" %}
   interface(MEMORY_INTER) %{
     base($reg);
-#ifdef AARCH64
-    index(0xff); // 0xff => no index
-#else
     index(0xf); // PC => no index
-#endif
     scale(0x0);
     disp($offset);
   %}
@@ -2993,16 +2564,11 @@ operand indOffset12x2(sp_ptr_RegP reg, immI12x2 offset) %{
   format %{ "[$reg + $offset]" %}
   interface(MEMORY_INTER) %{
     base($reg);
-#ifdef AARCH64
-    index(0xff); // 0xff => no index
-#else
     index(0xf); // PC => no index
-#endif
     scale(0x0);
     disp($offset);
   %}
 %}
-#endif // !AARCH64
 
 // Indirect with Register Index
 operand indIndex(iRegP addr, iRegX index) %{
@@ -3019,9 +2585,8 @@ operand indIndex(iRegP addr, iRegX index) %{
   %}
 %}
 
-#ifdef AARCH64
 // Indirect Memory Times Scale Plus Index Register
-operand indIndexScaleS(iRegP addr, iRegX index, immI_1 scale) %{
+operand indIndexScale(iRegP addr, iRegX index, immU5 scale) %{
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP addr (LShiftX index scale));
 
@@ -3035,130 +2600,9 @@ operand indIndexScaleS(iRegP addr, iRegX index, immI_1 scale) %{
   %}
 %}
 
-// Indirect Memory Times Scale Plus 32-bit Index Register
-operand indIndexIScaleS(iRegP addr, iRegI index, immI_1 scale) %{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP addr (LShiftX (ConvI2L index) scale));
-
-  op_cost(100);
-  format %{"[$addr + $index.w << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($addr);
-    index($index);
-    scale($scale);
-    disp(0x7fffffff); // sxtw
-  %}
-%}
-
-// Indirect Memory Times Scale Plus Index Register
-operand indIndexScaleI(iRegP addr, iRegX index, immI_2 scale) %{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP addr (LShiftX index scale));
-
-  op_cost(100);
-  format %{"[$addr + $index << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($addr);
-    index($index);
-    scale($scale);
-    disp(0x0);
-  %}
-%}
-
-// Indirect Memory Times Scale Plus 32-bit Index Register
-operand indIndexIScaleI(iRegP addr, iRegI index, immI_2 scale) %{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP addr (LShiftX (ConvI2L index) scale));
-
-  op_cost(100);
-  format %{"[$addr + $index.w << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($addr);
-    index($index);
-    scale($scale);
-    disp(0x7fffffff); // sxtw
-  %}
-%}
-
-// Indirect Memory Times Scale Plus Index Register
-operand indIndexScaleL(iRegP addr, iRegX index, immI_3 scale) %{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP addr (LShiftX index scale));
-
-  op_cost(100);
-  format %{"[$addr + $index << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($addr);
-    index($index);
-    scale($scale);
-    disp(0x0);
-  %}
-%}
-
-// Indirect Memory Times Scale Plus 32-bit Index Register
-operand indIndexIScaleL(iRegP addr, iRegI index, immI_3 scale) %{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP addr (LShiftX (ConvI2L index) scale));
-
-  op_cost(100);
-  format %{"[$addr + $index.w << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($addr);
-    index($index);
-    scale($scale);
-    disp(0x7fffffff); // sxtw
-  %}
-%}
-
-// Indirect Memory Times Scale Plus Index Register
-operand indIndexScaleQ(iRegP addr, iRegX index, immI_4 scale) %{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP addr (LShiftX index scale));
-
-  op_cost(100);
-  format %{"[$addr + $index << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($addr);
-    index($index);
-    scale($scale);
-    disp(0x0);
-  %}
-%}
-
-// Indirect Memory Times Scale Plus 32-bit Index Register
-operand indIndexIScaleQ(iRegP addr, iRegI index, immI_4 scale) %{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP addr (LShiftX (ConvI2L index) scale));
-
-  op_cost(100);
-  format %{"[$addr + $index.w << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($addr);
-    index($index);
-    scale($scale);
-    disp(0x7fffffff); // sxtw
-  %}
-%}
-#else
-// Indirect Memory Times Scale Plus Index Register
-operand indIndexScale(iRegP addr, iRegX index, immU5 scale) %{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP addr (LShiftX index scale));
-
-  op_cost(100);
-  format %{"[$addr + $index << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($addr);
-    index($index);
-    scale($scale);
-    disp(0x0);
-  %}
-%}
-#endif
-
-// Operands for expressing Control Flow
-// NOTE:  Label is a predefined operand which should not be redefined in
-//        the AD file.  It is generically handled within the ADLC.
+// Operands for expressing Control Flow
+// NOTE:  Label is a predefined operand which should not be redefined in
+//        the AD file.  It is generically handled within the ADLC.
 
 //----------Conditional Branch Operands----------------------------------------
 // Comparison Op  - This is the operation of the comparison, and is limited to
@@ -3312,29 +2756,6 @@ operand cmpOpUL_commute() %{
 // instructions for every form of operand when the instruction accepts
 // multiple operand types with the same basic encoding and format.  The classic
 // case of this is memory operands.
-#ifdef AARCH64
-opclass memoryB(indirect, indIndex, indOffsetU12ScaleB);
-opclass memoryS(indirect, indIndex, indIndexScaleS, indIndexIScaleS, indOffsetU12ScaleS);
-opclass memoryI(indirect, indIndex, indIndexScaleI, indIndexIScaleI, indOffsetU12ScaleI);
-opclass memoryL(indirect, indIndex, indIndexScaleL, indIndexIScaleL, indOffsetU12ScaleL);
-opclass memoryP(indirect, indIndex, indIndexScaleL, indIndexIScaleL, indOffsetU12ScaleL);
-opclass memoryQ(indirect, indIndex, indIndexScaleQ, indIndexIScaleQ, indOffsetU12ScaleQ);
-opclass memoryF(indirect, indIndex, indIndexScaleI, indIndexIScaleI, indOffsetU12ScaleI);
-opclass memoryD(indirect, indIndex, indIndexScaleL, indIndexIScaleL, indOffsetU12ScaleL);
-
-opclass memoryScaledS(indIndexScaleS, indIndexIScaleS);
-opclass memoryScaledI(indIndexScaleI, indIndexIScaleI);
-opclass memoryScaledL(indIndexScaleL, indIndexIScaleL);
-opclass memoryScaledP(indIndexScaleL, indIndexIScaleL);
-opclass memoryScaledQ(indIndexScaleQ, indIndexIScaleQ);
-opclass memoryScaledF(indIndexScaleI, indIndexIScaleI);
-opclass memoryScaledD(indIndexScaleL, indIndexIScaleL);
-// when ldrex/strex is used:
-opclass memoryex ( indirect );
-opclass indIndexMemory( indIndex );
-opclass memoryvld ( indirect /* , write back mode not implemented */ );
-
-#else
 
 opclass memoryI ( indirect, indOffset12, indIndex, indIndexScale );
 opclass memoryP ( indirect, indOffset12, indIndex, indIndexScale );
@@ -3354,7 +2775,6 @@ opclass memoryex ( indirect );
 opclass indIndexMemory( indIndex );
 opclass memorylong ( indirect, indOffset12x2 );
 opclass memoryvld ( indirect /* , write back mode not implemented */ );
-#endif
 
 //----------PIPELINE-----------------------------------------------------------
 pipeline %{
@@ -4163,7 +3583,6 @@ instruct loadB(iRegI dst, memoryB mem) %{
   size(4);
   format %{ "LDRSB   $dst,$mem\t! byte -> int" %}
   ins_encode %{
-    // High 32 bits are harmlessly set on Aarch64
     __ ldrsb($dst$$Register, $mem$$Address);
   %}
   ins_pipe(iload_mask_mem);
@@ -4174,13 +3593,6 @@ instruct loadB2L(iRegL dst, memoryB mem) %{
   match(Set dst (ConvI2L (LoadB mem)));
   ins_cost(MEMORY_REF_COST);
 
-#ifdef AARCH64
-  size(4);
-  format %{ "LDRSB $dst,$mem\t! byte -> long"  %}
-  ins_encode %{
-    __ ldrsb($dst$$Register, $mem$$Address);
-  %}
-#else
   size(8);
   format %{ "LDRSB $dst.lo,$mem\t! byte -> long\n\t"
             "ASR   $dst.hi,$dst.lo,31" %}
@@ -4188,7 +3600,6 @@ instruct loadB2L(iRegL dst, memoryB mem) %{
     __ ldrsb($dst$$Register, $mem$$Address);
     __ mov($dst$$Register->successor(), AsmOperand($dst$$Register, asr, 31));
   %}
-#endif
   ins_pipe(iload_mask_mem);
 %}
 
@@ -4210,13 +3621,6 @@ instruct loadUB2L(iRegL dst, memoryB mem) %{
   match(Set dst (ConvI2L (LoadUB mem)));
   ins_cost(MEMORY_REF_COST);
 
-#ifdef AARCH64
-  size(4);
-  format %{ "LDRB  $dst,$mem\t! ubyte -> long"  %}
-  ins_encode %{
-    __ ldrb($dst$$Register, $mem$$Address);
-  %}
-#else
   size(8);
   format %{ "LDRB  $dst.lo,$mem\t! ubyte -> long\n\t"
             "MOV   $dst.hi,0" %}
@@ -4224,7 +3628,6 @@ instruct loadUB2L(iRegL dst, memoryB mem) %{
     __ ldrb($dst$$Register, $mem$$Address);
     __ mov($dst$$Register->successor(), 0);
   %}
-#endif
   ins_pipe(iload_mem);
 %}
 
@@ -4232,16 +3635,6 @@ instruct loadUB2L(iRegL dst, memoryB mem) %{
 instruct loadUB2L_limmI(iRegL dst, memoryB mem, limmIlow8 mask) %{
   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 
-#ifdef AARCH64
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST);
-  size(8);
-  format %{ "LDRB  $dst,$mem\t! ubyte -> long\n\t"
-            "AND  $dst,$dst,$mask" %}
-  ins_encode %{
-    __ ldrb($dst$$Register, $mem$$Address);
-    __ andr($dst$$Register, $dst$$Register, limmI_low($mask$$constant, 8));
-  %}
-#else
   ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
   size(12);
   format %{ "LDRB  $dst.lo,$mem\t! ubyte -> long\n\t"
@@ -4252,29 +3645,10 @@ instruct loadUB2L_limmI(iRegL dst, memoryB mem, limmIlow8 mask) %{
     __ mov($dst$$Register->successor(), 0);
     __ andr($dst$$Register, $dst$$Register, limmI_low($mask$$constant, 8));
   %}
-#endif
   ins_pipe(iload_mem);
 %}
 
 // Load Short (16bit signed)
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct loadSoff(iRegI dst, memoryScaledS mem, aimmX off, iRegP tmp) %{
-  match(Set dst (LoadS (AddP mem off)));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
-
-  format %{ "LDRSH   $dst,$mem+$off\t! short temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ ldrsh($dst$$Register, nmem);
-  %}
-  ins_pipe(iload_mask_mem);
-%}
-#endif
 
 instruct loadS(iRegI dst, memoryS mem) %{
   match(Set dst (LoadS mem));
@@ -4297,7 +3671,6 @@ instruct loadS2B(iRegI dst, memoryS mem, immI_24 twentyfour) %{
 
   format %{ "LDRSB   $dst,$mem\t! short -> byte" %}
   ins_encode %{
-    // High 32 bits are harmlessly set on Aarch64
     __ ldrsb($dst$$Register, $mem$$Address);
   %}
   ins_pipe(iload_mask_mem);
@@ -4308,13 +3681,6 @@ instruct loadS2L(iRegL dst, memoryS mem) %{
   match(Set dst (ConvI2L (LoadS mem)));
   ins_cost(MEMORY_REF_COST);
 
-#ifdef AARCH64
-  size(4);
-  format %{ "LDRSH $dst,$mem\t! short -> long"  %}
-  ins_encode %{
-    __ ldrsh($dst$$Register, $mem$$Address);
-  %}
-#else
   size(8);
   format %{ "LDRSH $dst.lo,$mem\t! short -> long\n\t"
             "ASR   $dst.hi,$dst.lo,31" %}
@@ -4322,30 +3688,11 @@ instruct loadS2L(iRegL dst, memoryS mem) %{
     __ ldrsh($dst$$Register, $mem$$Address);
     __ mov($dst$$Register->successor(), AsmOperand($dst$$Register, asr, 31));
   %}
-#endif
   ins_pipe(iload_mask_mem);
 %}
 
 // Load Unsigned Short/Char (16bit UNsigned)
 
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct loadUSoff(iRegI dst, memoryScaledS mem, aimmX off, iRegP tmp) %{
-  match(Set dst (LoadUS (AddP mem off)));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
-
-  format %{ "LDRH   $dst,$mem+$off\t! ushort/char temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ ldrh($dst$$Register, nmem);
-  %}
-  ins_pipe(iload_mem);
-%}
-#endif
 
 instruct loadUS(iRegI dst, memoryS mem) %{
   match(Set dst (LoadUS mem));
@@ -4377,13 +3724,6 @@ instruct loadUS2L(iRegL dst, memoryS mem) %{
   match(Set dst (ConvI2L (LoadUS mem)));
   ins_cost(MEMORY_REF_COST);
 
-#ifdef AARCH64
-  size(4);
-  format %{ "LDRH  $dst,$mem\t! short -> long"  %}
-  ins_encode %{
-    __ ldrh($dst$$Register, $mem$$Address);
-  %}
-#else
   size(8);
   format %{ "LDRH  $dst.lo,$mem\t! short -> long\n\t"
             "MOV   $dst.hi, 0" %}
@@ -4391,7 +3731,6 @@ instruct loadUS2L(iRegL dst, memoryS mem) %{
     __ ldrh($dst$$Register, $mem$$Address);
     __ mov($dst$$Register->successor(), 0);
   %}
-#endif
   ins_pipe(iload_mem);
 %}
 
@@ -4400,13 +3739,6 @@ instruct loadUS2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{
   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
   ins_cost(MEMORY_REF_COST);
 
-#ifdef AARCH64
-  size(4);
-  format %{ "LDRB  $dst,$mem"  %}
-  ins_encode %{
-    __ ldrb($dst$$Register, $mem$$Address);
-  %}
-#else
   size(8);
   format %{ "LDRB  $dst.lo,$mem\t! \n\t"
             "MOV   $dst.hi, 0" %}
@@ -4414,24 +3746,12 @@ instruct loadUS2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{
     __ ldrb($dst$$Register, $mem$$Address);
     __ mov($dst$$Register->successor(), 0);
   %}
-#endif
   ins_pipe(iload_mem);
 %}
 
 // Load Unsigned Short/Char (16bit UNsigned) with a immediate mask into a Long Register
 instruct loadUS2L_limmI(iRegL dst, memoryS mem, limmI mask) %{
   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
-#ifdef AARCH64
-  ins_cost(MEMORY_REF_COST + 1*DEFAULT_COST);
-
-  size(8);
-  format %{ "LDRH   $dst,$mem\t! ushort/char & mask -> long\n\t"
-            "AND    $dst,$dst,$mask" %}
-  ins_encode %{
-    __ ldrh($dst$$Register, $mem$$Address);
-    __ andr($dst$$Register, $dst$$Register, (uintx)$mask$$constant);
-  %}
-#else
   ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);
 
   size(12);
@@ -4443,30 +3763,11 @@ instruct loadUS2L_limmI(iRegL dst, memoryS mem, limmI mask) %{
     __ mov($dst$$Register->successor(), 0);
     __ andr($dst$$Register, $dst$$Register, $mask$$constant);
   %}
-#endif
   ins_pipe(iload_mem);
 %}
 
 // Load Integer
 
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct loadIoff(iRegI dst, memoryScaledI mem, aimmX off, iRegP tmp) %{
-  match(Set dst (LoadI (AddP mem off)));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
-
-  format %{ "ldr_s32 $dst,$mem+$off\t! int temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ ldr_s32($dst$$Register, nmem);
-  %}
-  ins_pipe(iload_mem);
-%}
-#endif
 
 instruct loadI(iRegI dst, memoryI mem) %{
   match(Set dst (LoadI mem));
@@ -4537,15 +3838,6 @@ instruct loadI2US(iRegI dst, memoryS mem, immI_65535 mask) %{
 // Load Integer into a Long Register
 instruct loadI2L(iRegL dst, memoryI mem) %{
   match(Set dst (ConvI2L (LoadI mem)));
-#ifdef AARCH64
-  ins_cost(MEMORY_REF_COST);
-
-  size(4);
-  format %{ "LDRSW $dst.lo,$mem\t! int -> long"  %}
-  ins_encode %{
-    __ ldr_s32($dst$$Register, $mem$$Address);
-  %}
-#else
   ins_cost(MEMORY_REF_COST);
 
   size(8);
@@ -4555,22 +3847,12 @@ instruct loadI2L(iRegL dst, memoryI mem) %{
     __ ldr($dst$$Register, $mem$$Address);
     __ mov($dst$$Register->successor(), AsmOperand($dst$$Register, asr, 31));
   %}
-#endif
   ins_pipe(iload_mask_mem);
 %}
 
 // Load Integer with mask 0xFF into a Long Register
 instruct loadI2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{
   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
-#ifdef AARCH64
-  ins_cost(MEMORY_REF_COST);
-
-  size(4);
-  format %{ "LDRB   $dst.lo,$mem\t! int & 0xFF -> long"  %}
-  ins_encode %{
-    __ ldrb($dst$$Register, $mem$$Address);
-  %}
-#else
   ins_cost(MEMORY_REF_COST);
 
   size(8);
@@ -4580,7 +3862,6 @@ instruct loadI2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{
     __ ldrb($dst$$Register, $mem$$Address);
     __ mov($dst$$Register->successor(), 0);
   %}
-#endif
   ins_pipe(iload_mem);
 %}
 
@@ -4589,13 +3870,6 @@ instruct loadI2L_immI_65535(iRegL dst, memoryS mem, immI_65535 mask) %{
   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
   ins_cost(MEMORY_REF_COST);
 
-#ifdef AARCH64
-  size(4);
-  format %{ "LDRH   $dst,$mem\t! int & 0xFFFF -> long" %}
-  ins_encode %{
-    __ ldrh($dst$$Register, $mem$$Address);
-  %}
-#else
   size(8);
   format %{ "LDRH   $dst,$mem\t! int & 0xFFFF -> long\n\t"
             "MOV    $dst.hi, 0" %}
@@ -4603,27 +3877,9 @@ instruct loadI2L_immI_65535(iRegL dst, memoryS mem, immI_65535 mask) %{
     __ ldrh($dst$$Register, $mem$$Address);
     __ mov($dst$$Register->successor(), 0);
   %}
-#endif
   ins_pipe(iload_mask_mem);
 %}
 
-#ifdef AARCH64
-// Load Integer with an immediate mask into a Long Register
-instruct loadI2L_limmI(iRegL dst, memoryI mem, limmI mask) %{
-  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
-  ins_cost(MEMORY_REF_COST + 1*DEFAULT_COST);
-
-  size(8);
-  format %{ "LDRSW $dst,$mem\t! int -> long\n\t"
-            "AND   $dst,$dst,$mask" %}
-
-  ins_encode %{
-    __ ldr_s32($dst$$Register, $mem$$Address);
-    __ andr($dst$$Register, $dst$$Register, (uintx)$mask$$constant);
-  %}
-  ins_pipe(iload_mem);
-%}
-#else
 // Load Integer with a 31-bit immediate mask into a Long Register
 instruct loadI2L_limmU31(iRegL dst, memoryI mem, limmU31 mask) %{
   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
@@ -4641,27 +3897,7 @@ instruct loadI2L_limmU31(iRegL dst, memoryI mem, limmU31 mask) %{
   %}
   ins_pipe(iload_mem);
 %}
-#endif
-
-#ifdef AARCH64
-// Load Integer with mask into a Long Register
-// FIXME: use signedRegI mask, remove tmp?
-instruct loadI2L_immI(iRegL dst, memoryI mem, immI mask, iRegI tmp) %{
-  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
-  effect(TEMP dst, TEMP tmp);
 
-  ins_cost(MEMORY_REF_COST + 3*DEFAULT_COST);
-  format %{ "LDRSW    $mem,$dst\t! int & 31-bit mask -> long\n\t"
-            "MOV_SLOW $tmp,$mask\n\t"
-            "AND      $dst,$tmp,$dst" %}
-  ins_encode %{
-    __ ldrsw($dst$$Register, $mem$$Address);
-    __ mov_slow($tmp$$Register, $mask$$constant);
-    __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
-  %}
-  ins_pipe(iload_mem);
-%}
-#else
 // Load Integer with a 31-bit mask into a Long Register
 // FIXME: use iRegI mask, remove tmp?
 instruct loadI2L_immU31(iRegL dst, memoryI mem, immU31 mask, iRegI tmp) %{
@@ -4682,20 +3918,12 @@ instruct loadI2L_immU31(iRegL dst, memoryI mem, immU31 mask, iRegI tmp) %{
   %}
   ins_pipe(iload_mem);
 %}
-#endif
 
 // Load Unsigned Integer into a Long Register
 instruct loadUI2L(iRegL dst, memoryI mem, immL_32bits mask) %{
   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
   ins_cost(MEMORY_REF_COST);
 
-#ifdef AARCH64
-//size(4);
-  format %{ "LDR_w $dst,$mem\t! uint -> long" %}
-  ins_encode %{
-    __ ldr_w($dst$$Register, $mem$$Address);
-  %}
-#else
   size(8);
   format %{ "LDR   $dst.lo,$mem\t! uint -> long\n\t"
             "MOV   $dst.hi,0" %}
@@ -4703,37 +3931,14 @@ instruct loadUI2L(iRegL dst, memoryI mem, immL_32bits mask) %{
     __ ldr($dst$$Register, $mem$$Address);
     __ mov($dst$$Register->successor(), 0);
   %}
-#endif
   ins_pipe(iload_mem);
 %}
 
 // Load Long
 
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct loadLoff(iRegLd dst, memoryScaledL mem, aimmX off, iRegP tmp) %{
-  match(Set dst (LoadL (AddP mem off)));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
-
-  format %{ "LDR    $dst,$mem+$off\t! long temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ ldr($dst$$Register, nmem);
-  %}
-  ins_pipe(iload_mem);
-%}
-#endif
 
 instruct loadL(iRegLd dst, memoryL mem ) %{
-#ifdef AARCH64
-  // already atomic for Aarch64
-#else
   predicate(!((LoadLNode*)n)->require_atomic_access());
-#endif
   match(Set dst (LoadL mem));
   effect(TEMP dst);
   ins_cost(MEMORY_REF_COST);
@@ -4746,7 +3951,6 @@ instruct loadL(iRegLd dst, memoryL mem ) %{
   ins_pipe(iload_mem);
 %}
 
-#ifndef AARCH64
 instruct loadL_2instr(iRegL dst, memorylong mem ) %{
   predicate(!((LoadLNode*)n)->require_atomic_access());
   match(Set dst (LoadL mem));
@@ -4822,7 +4026,6 @@ instruct loadL_unaligned(iRegL dst, memorylong mem ) %{
   %}
   ins_pipe(iload_mem);
 %}
-#endif // !AARCH64
 
 // Load Range
 instruct loadRange(iRegI dst, memoryI mem) %{
@@ -4839,31 +4042,13 @@ instruct loadRange(iRegI dst, memoryI mem) %{
 
 // Load Pointer
 
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct loadPoff(iRegP dst, memoryScaledP mem, aimmX off, iRegP tmp) %{
-  match(Set dst (LoadP (AddP mem off)));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
 
-  format %{ "LDR    $dst,$mem+$off\t! ptr temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ ldr($dst$$Register, nmem);
-  %}
-  ins_pipe(iload_mem);
-%}
-#endif
-
-instruct loadP(iRegP dst, memoryP mem) %{
-  match(Set dst (LoadP mem));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-
-  format %{ "LDR   $dst,$mem\t! ptr" %}
+instruct loadP(iRegP dst, memoryP mem) %{
+  match(Set dst (LoadP mem));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+
+  format %{ "LDR   $dst,$mem\t! ptr" %}
   ins_encode %{
     __ ldr($dst$$Register, $mem$$Address);
   %}
@@ -4950,24 +4135,6 @@ instruct loadNKlass(iRegN dst, memoryI mem) %{
 %}
 #endif
 
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct loadDoff(regD dst, memoryScaledD mem, aimmX off, iRegP tmp) %{
-  match(Set dst (LoadD (AddP mem off)));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
-
-  format %{ "ldr    $dst,$mem+$off\t! double temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ ldr_d($dst$$FloatRegister, nmem);
-  %}
-  ins_pipe(floadD_mem);
-%}
-#endif
 
 instruct loadD(regD dst, memoryD mem) %{
   match(Set dst (LoadD mem));
@@ -4983,7 +4150,6 @@ instruct loadD(regD dst, memoryD mem) %{
   ins_pipe(floadD_mem);
 %}
 
-#ifndef AARCH64
 // Load Double - UNaligned
 instruct loadD_unaligned(regD_low dst, memoryF2 mem ) %{
   match(Set dst (LoadD_unaligned mem));
@@ -4999,26 +4165,7 @@ instruct loadD_unaligned(regD_low dst, memoryF2 mem ) %{
   %}
   ins_pipe(iload_mem);
 %}
-#endif
-
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct loadFoff(regF dst, memoryScaledF mem, aimmX off, iRegP tmp) %{
-  match(Set dst (LoadF (AddP mem off)));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
 
-  format %{ "ldr    $dst,$mem+$off\t! float temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ ldr_s($dst$$FloatRegister, nmem);
-  %}
-  ins_pipe(floadF_mem);
-%}
-#endif
 
 instruct loadF(regF dst, memoryF mem) %{
   match(Set dst (LoadF mem));
@@ -5032,17 +4179,6 @@ instruct loadF(regF dst, memoryF mem) %{
   ins_pipe(floadF_mem);
 %}
 
-#ifdef AARCH64
-instruct load_limmI(iRegI dst, limmI src) %{
-  match(Set dst src);
-  ins_cost(DEFAULT_COST + 1); // + 1 because MOV is preferred
-  format %{ "ORR_w  $dst, ZR, $src\t! int"  %}
-  ins_encode %{
-    __ orr_w($dst$$Register, ZR, (uintx)$src$$constant);
-  %}
-  ins_pipe(ialu_imm);
-%}
-#endif
 
 // // Load Constant
 instruct loadConI( iRegI dst, immI src ) %{
@@ -5065,7 +4201,6 @@ instruct loadConIMov( iRegI dst, immIMov src ) %{
   ins_pipe(ialu_imm);
 %}
 
-#ifndef AARCH64
 instruct loadConIMovn( iRegI dst, immIRotn src ) %{
   match(Set dst src);
   size(4);
@@ -5075,22 +4210,13 @@ instruct loadConIMovn( iRegI dst, immIRotn src ) %{
   %}
   ins_pipe(ialu_imm_n);
 %}
-#endif
 
 instruct loadConI16( iRegI dst, immI16 src ) %{
   match(Set dst src);
   size(4);
-#ifdef AARCH64
-  format %{ "MOVZ_w  $dst, $src" %}
-#else
   format %{ "MOVW    $dst, $src" %}
-#endif
   ins_encode %{
-#ifdef AARCH64
-    __ mov_w($dst$$Register, $src$$constant);
-#else
     __ movw($dst$$Register, $src$$constant);
-#endif
   %}
   ins_pipe(ialu_imm_n);
 %}
@@ -5124,80 +4250,6 @@ instruct loadConP_poll(iRegP dst, immP_poll src) %{
   ins_pipe(loadConP_poll);
 %}
 
-#ifdef AARCH64
-instruct loadConP0(iRegP dst, immP0 src) %{
-  match(Set dst src);
-  ins_cost(DEFAULT_COST);
-  format %{ "MOV    $dst,ZR\t!ptr" %}
-  ins_encode %{
-    __ mov($dst$$Register, ZR);
-  %}
-  ins_pipe(ialu_none);
-%}
-
-instruct loadConN(iRegN dst, immN src) %{
-  match(Set dst src);
-  ins_cost(DEFAULT_COST * 3/2);
-  format %{ "SET    $dst,$src\t! compressed ptr" %}
-  ins_encode %{
-    Register dst = $dst$$Register;
-    // FIXME: use $constanttablebase?
-    __ set_narrow_oop(dst, (jobject)$src$$constant);
-  %}
-  ins_pipe(ialu_hi_lo_reg);
-%}
-
-instruct loadConN0(iRegN dst, immN0 src) %{
-  match(Set dst src);
-  ins_cost(DEFAULT_COST);
-  format %{ "MOV    $dst,ZR\t! compressed ptr" %}
-  ins_encode %{
-    __ mov($dst$$Register, ZR);
-  %}
-  ins_pipe(ialu_none);
-%}
-
-instruct loadConNKlass(iRegN dst, immNKlass src) %{
-  match(Set dst src);
-  ins_cost(DEFAULT_COST * 3/2);
-  format %{ "SET    $dst,$src\t! compressed klass ptr" %}
-  ins_encode %{
-    Register dst = $dst$$Register;
-    // FIXME: use $constanttablebase?
-    __ set_narrow_klass(dst, (Klass*)$src$$constant);
-  %}
-  ins_pipe(ialu_hi_lo_reg);
-%}
-
-instruct load_limmL(iRegL dst, limmL src) %{
-  match(Set dst src);
-  ins_cost(DEFAULT_COST);
-  format %{ "ORR    $dst, ZR, $src\t! long"  %}
-  ins_encode %{
-    __ orr($dst$$Register, ZR, (uintx)$src$$constant);
-  %}
-  ins_pipe(loadConL);
-%}
-instruct load_immLMov(iRegL dst, immLMov src) %{
-  match(Set dst src);
-  ins_cost(DEFAULT_COST);
-  format %{ "MOV    $dst, $src\t! long"  %}
-  ins_encode %{
-    __ mov($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(loadConL);
-%}
-instruct loadConL(iRegL dst, immL src) %{
-  match(Set dst src);
-  ins_cost(DEFAULT_COST * 4); // worst case
-  format %{ "mov_slow   $dst, $src\t! long"  %}
-  ins_encode %{
-    // FIXME: use $constanttablebase?
-    __ mov_slow($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(loadConL);
-%}
-#else
 instruct loadConL(iRegL dst, immL src) %{
   match(Set dst src);
   ins_cost(DEFAULT_COST * 4);
@@ -5223,7 +4275,6 @@ instruct loadConL16( iRegL dst, immL16 src ) %{
   %}
   ins_pipe(ialu_imm);
 %}
-#endif
 
 instruct loadConF_imm8(regF dst, imm8F src) %{
   match(Set dst src);
@@ -5238,25 +4289,6 @@ instruct loadConF_imm8(regF dst, imm8F src) %{
   ins_pipe(loadConFD); // FIXME
 %}
 
-#ifdef AARCH64
-instruct loadIConF(iRegI dst, immF src) %{
-  match(Set dst src);
-  ins_cost(DEFAULT_COST * 2);
-
-  format %{ "MOV_SLOW  $dst, $src\t! loadIConF"  %}
-
-  ins_encode %{
-    // FIXME revisit once 6961697 is in
-    union {
-      jfloat f;
-      int i;
-    } v;
-    v.f = $src$$constant;
-    __ mov_slow($dst$$Register, v.i);
-  %}
-  ins_pipe(ialu_imm);
-%}
-#endif
 
 instruct loadConF(regF dst, immF src, iRegI tmp) %{
   match(Set dst src);
@@ -5324,11 +4356,7 @@ instruct prefetchAlloc_mp( memoryP mem ) %{
 
   format %{ "PLDW $mem\t! Prefetch allocation" %}
   ins_encode %{
-#ifdef AARCH64
-    __ prfm(pstl1keep, $mem$$Address);
-#else
     __ pldw($mem$$Address);
-#endif
   %}
   ins_pipe(iload_mem);
 %}
@@ -5341,11 +4369,7 @@ instruct prefetchAlloc_sp( memoryP mem ) %{
 
   format %{ "PLD $mem\t! Prefetch allocation" %}
   ins_encode %{
-#ifdef AARCH64
-    __ prfm(pstl1keep, $mem$$Address);
-#else
     __ pld($mem$$Address);
-#endif
   %}
   ins_pipe(iload_mem);
 %}
@@ -5378,24 +4402,6 @@ instruct storeCM(memoryB mem, store_RegI src) %{
 
 // Store Char/Short
 
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct storeCoff(store_RegI src, memoryScaledS mem, aimmX off, iRegP tmp) %{
-  match(Set mem (StoreC (AddP mem off) src));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
-
-  format %{ "STRH    $src,$mem+$off\t! short temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ strh($src$$Register, nmem);
-  %}
-  ins_pipe(istore_mem_reg);
-%}
-#endif
 
 instruct storeC(memoryS mem, store_RegI src) %{
   match(Set mem (StoreC mem src));
@@ -5411,24 +4417,6 @@ instruct storeC(memoryS mem, store_RegI src) %{
 
 // Store Integer
 
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct storeIoff(store_RegI src, memoryScaledI mem, aimmX off, iRegP tmp) %{
-  match(Set mem (StoreI (AddP mem off) src));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
-
-  format %{ "str_32 $src,$mem+$off\t! int temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ str_32($src$$Register, nmem);
-  %}
-  ins_pipe(istore_mem_reg);
-%}
-#endif
 
 instruct storeI(memoryI mem, store_RegI src) %{
   match(Set mem (StoreI mem src));
@@ -5444,31 +4432,9 @@ instruct storeI(memoryI mem, store_RegI src) %{
 
 // Store Long
 
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct storeLoff(store_RegLd src, memoryScaledL mem, aimmX off, iRegP tmp) %{
-  match(Set mem (StoreL (AddP mem off) src));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
-
-  format %{ "str_64 $src,$mem+$off\t! long temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ str_64($src$$Register, nmem);
-  %}
-  ins_pipe(istore_mem_reg);
-%}
-#endif
 
 instruct storeL(memoryL mem, store_RegLd src) %{
-#ifdef AARCH64
-  // already atomic for Aarch64
-#else
   predicate(!((StoreLNode*)n)->require_atomic_access());
-#endif
   match(Set mem (StoreL mem src));
   ins_cost(MEMORY_REF_COST);
 
@@ -5481,7 +4447,6 @@ instruct storeL(memoryL mem, store_RegLd src) %{
   ins_pipe(istore_mem_reg);
 %}
 
-#ifndef AARCH64
 instruct storeL_2instr(memorylong mem, iRegL src) %{
   predicate(!((StoreLNode*)n)->require_atomic_access());
   match(Set mem (StoreL mem src));
@@ -5514,9 +4479,7 @@ instruct storeL_volatile(indirect mem, iRegL src) %{
   %}
   ins_pipe(istore_mem_reg);
 %}
-#endif // !AARCH64
 
-#ifndef AARCH64
 instruct storeL_volatile_fp(memoryD mem, iRegL src) %{
   predicate(((StoreLNode*)n)->require_atomic_access());
   match(Set mem (StoreL mem src));
@@ -5530,7 +4493,6 @@ instruct storeL_volatile_fp(memoryD mem, iRegL src) %{
   %}
   ins_pipe(istore_mem_reg);
 %}
-#endif
 
 #ifdef XXX
 // Move SP Pointer
@@ -5552,60 +4514,12 @@ instruct movSP(store_ptr_RegP dst, SPRegP src) %{
 %}
 #endif
 
-#ifdef AARCH64
-// FIXME
-// Store SP Pointer
-instruct storeSP(memoryP mem, SPRegP src, iRegP tmp) %{
-  match(Set mem (StoreP mem src));
-  predicate(_kids[1]->_leaf->is_Proj() && _kids[1]->_leaf->as_Proj()->_con == TypeFunc::FramePtr);
-  // Multiple StoreP rules, different only in register mask.
-  // Matcher makes the last always valid.  The others will
-  // only be valid if they cost less than the last valid
-  // rule.  So cost(rule1) < cost(rule2) < cost(last)
-  // Unlike immediates, register constraints are not checked
-  // at match time.
-  ins_cost(MEMORY_REF_COST+DEFAULT_COST+4);
-  effect(TEMP tmp);
-  size(8);
-
-  format %{ "MOV    $tmp,$src\t! SP ptr\n\t"
-            "STR    $tmp,$mem\t! SP ptr" %}
-  ins_encode %{
-    assert($src$$Register == SP, "SP expected");
-    __ mov($tmp$$Register, $src$$Register);
-    __ str($tmp$$Register, $mem$$Address);
-  %}
-  ins_pipe(istore_mem_spORreg); // FIXME
-%}
-#endif // AARCH64
 
 // Store Pointer
 
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct storePoff(store_ptr_RegP src, memoryScaledP mem, aimmX off, iRegP tmp) %{
-  predicate(!_kids[1]->_leaf->is_Proj() || _kids[1]->_leaf->as_Proj()->_con != TypeFunc::FramePtr);
-  match(Set mem (StoreP (AddP mem off) src));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
-
-  format %{ "STR    $src,$mem+$off\t! ptr temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ str($src$$Register, nmem);
-  %}
-  ins_pipe(istore_mem_reg);
-%}
-#endif
 
 instruct storeP(memoryP mem, store_ptr_RegP src) %{
   match(Set mem (StoreP mem src));
-#ifdef AARCH64
-  predicate(!_kids[1]->_leaf->is_Proj() || _kids[1]->_leaf->as_Proj()->_con != TypeFunc::FramePtr);
-#endif
   ins_cost(MEMORY_REF_COST);
   size(4);
 
@@ -5616,42 +4530,10 @@ instruct storeP(memoryP mem, store_ptr_RegP src) %{
   ins_pipe(istore_mem_spORreg);
 %}
 
-#ifdef AARCH64
-// Store NULL Pointer
-instruct storeP0(memoryP mem, immP0 src) %{
-  match(Set mem (StoreP mem src));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-
-  format %{ "STR    ZR,$mem\t! ptr" %}
-  ins_encode %{
-    __ str(ZR, $mem$$Address);
-  %}
-  ins_pipe(istore_mem_spORreg);
-%}
-#endif // AARCH64
 
 #ifdef _LP64
 // Store Compressed Pointer
 
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct storeNoff(store_RegN src, memoryScaledI mem, aimmX off, iRegP tmp) %{
-  match(Set mem (StoreN (AddP mem off) src));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
-
-  format %{ "str_32 $src,$mem+$off\t! compressed ptr temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ str_32($src$$Register, nmem);
-  %}
-  ins_pipe(istore_mem_reg);
-%}
-#endif
 
 instruct storeN(memoryI mem, store_RegN src) %{
   match(Set mem (StoreN mem src));
@@ -5665,20 +4547,6 @@ instruct storeN(memoryI mem, store_RegN src) %{
   ins_pipe(istore_mem_reg);
 %}
 
-#ifdef AARCH64
-// Store NULL Pointer
-instruct storeN0(memoryI mem, immN0 src) %{
-  match(Set mem (StoreN mem src));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-
-  format %{ "str_32 ZR,$mem\t! compressed ptr" %}
-  ins_encode %{
-    __ str_32(ZR, $mem$$Address);
-  %}
-  ins_pipe(istore_mem_reg);
-%}
-#endif
 
 // Store Compressed Klass Pointer
 instruct storeNKlass(memoryI mem, store_RegN src) %{
@@ -5696,24 +4564,6 @@ instruct storeNKlass(memoryI mem, store_RegN src) %{
 
 // Store Double
 
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct storeDoff(regD src, memoryScaledD mem, aimmX off, iRegP tmp) %{
-  match(Set mem (StoreD (AddP mem off) src));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
-
-  format %{ "STR    $src,$mem+$off\t! double temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ str_d($src$$FloatRegister, nmem);
-  %}
-  ins_pipe(fstoreD_mem_reg);
-%}
-#endif
 
 instruct storeD(memoryD mem, regD src) %{
   match(Set mem (StoreD mem src));
@@ -5729,132 +4579,26 @@ instruct storeD(memoryD mem, regD src) %{
   ins_pipe(fstoreD_mem_reg);
 %}
 
-#ifdef AARCH64
-instruct movI2F(regF dst, iRegI src) %{
-  match(Set dst src);
-  size(4);
 
-  format %{ "FMOV_sw $dst,$src\t! movI2F" %}
-  ins_encode %{
-    __ fmov_sw($dst$$FloatRegister, $src$$Register);
-  %}
-  ins_pipe(ialu_reg); // FIXME
-%}
+// Store Float
 
-instruct movF2I(iRegI dst, regF src) %{
-  match(Set dst src);
-  size(4);
 
-  format %{ "FMOV_ws $dst,$src\t! movF2I" %}
+instruct storeF( memoryF mem, regF src) %{
+  match(Set mem (StoreF mem src));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "FSTS    $src,$mem" %}
   ins_encode %{
-    __ fmov_ws($dst$$Register, $src$$FloatRegister);
+    __ str_float($src$$FloatRegister, $mem$$Address);
   %}
-  ins_pipe(ialu_reg); // FIXME
+  ins_pipe(fstoreF_mem_reg);
 %}
-#endif
 
-// Store Float
 
-#ifdef AARCH64
-// XXX This variant shouldn't be necessary if 6217251 is implemented
-instruct storeFoff(regF src, memoryScaledF mem, aimmX off, iRegP tmp) %{
-  match(Set mem (StoreF (AddP mem off) src));
-  ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free
-  effect(TEMP tmp);
-  size(4 * 2);
+//----------MemBar Instructions-----------------------------------------------
+// Memory barrier flavors
 
-  format %{ "str_s  $src,$mem+$off\t! float temp=$tmp" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    __ add($tmp$$Register, base, $off$$constant);
-    Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    __ str_s($src$$FloatRegister, nmem);
-  %}
-  ins_pipe(fstoreF_mem_reg);
-%}
-#endif
-
-instruct storeF( memoryF mem, regF src) %{
-  match(Set mem (StoreF mem src));
-  ins_cost(MEMORY_REF_COST);
-
-  size(4);
-  format %{ "FSTS    $src,$mem" %}
-  ins_encode %{
-    __ str_float($src$$FloatRegister, $mem$$Address);
-  %}
-  ins_pipe(fstoreF_mem_reg);
-%}
-
-#ifdef AARCH64
-// Convert oop pointer into compressed form
-instruct encodeHeapOop(iRegN dst, iRegP src, flagsReg ccr) %{
-  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
-  match(Set dst (EncodeP src));
-  effect(KILL ccr);
-  format %{ "encode_heap_oop $dst, $src" %}
-  ins_encode %{
-    __ encode_heap_oop($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct encodeHeapOop_not_null(iRegN dst, iRegP src) %{
-  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
-  match(Set dst (EncodeP src));
-  format %{ "encode_heap_oop_not_null $dst, $src" %}
-  ins_encode %{
-    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct decodeHeapOop(iRegP dst, iRegN src, flagsReg ccr) %{
-  predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
-            n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
-  match(Set dst (DecodeN src));
-  effect(KILL ccr);
-  format %{ "decode_heap_oop $dst, $src" %}
-  ins_encode %{
-    __ decode_heap_oop($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct decodeHeapOop_not_null(iRegP dst, iRegN src) %{
-  predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
-            n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
-  match(Set dst (DecodeN src));
-  format %{ "decode_heap_oop_not_null $dst, $src" %}
-  ins_encode %{
-    __ decode_heap_oop_not_null($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct encodeKlass_not_null(iRegN dst, iRegP src) %{
-  match(Set dst (EncodePKlass src));
-  format %{ "encode_klass_not_null $dst, $src" %}
-  ins_encode %{
-    __ encode_klass_not_null($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct decodeKlass_not_null(iRegP dst, iRegN src) %{
-  match(Set dst (DecodeNKlass src));
-  format %{ "decode_klass_not_null $dst, $src" %}
-  ins_encode %{
-    __ decode_klass_not_null($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-#endif // AARCH64
-
-//----------MemBar Instructions-----------------------------------------------
-// Memory barrier flavors
-
-// TODO: take advantage of Aarch64 load-acquire, store-release, etc
 // pattern-match out unnecessary membars
 instruct membar_storestore() %{
   match(MemBarStoreStore);
@@ -5950,53 +4694,6 @@ instruct unnecessary_membar_volatile() %{
 // %}
 
 
-#ifdef AARCH64
-// 0 constant in register
-instruct zrImmI0(ZRRegI dst, immI0 imm) %{
-  match(Set dst imm);
-  size(0);
-  ins_cost(0);
-
-  format %{ "! ZR (int 0)" %}
-  ins_encode( /*empty encoding*/ );
-  ins_pipe(ialu_none);
-%}
-
-// 0 constant in register
-instruct zrImmL0(ZRRegL dst, immL0 imm) %{
-  match(Set dst imm);
-  size(0);
-  ins_cost(0);
-
-  format %{ "! ZR (long 0)" %}
-  ins_encode( /*empty encoding*/ );
-  ins_pipe(ialu_none);
-%}
-
-#ifdef XXX
-// 0 constant in register
-instruct zrImmN0(ZRRegN dst, immN0 imm) %{
-  match(Set dst imm);
-  size(0);
-  ins_cost(0);
-
-  format %{ "! ZR (compressed pointer NULL)" %}
-  ins_encode( /*empty encoding*/ );
-  ins_pipe(ialu_none);
-%}
-
-// 0 constant in register
-instruct zrImmP0(ZRRegP dst, immP0 imm) %{
-  match(Set dst imm);
-  size(0);
-  ins_cost(0);
-
-  format %{ "! ZR (NULL)" %}
-  ins_encode( /*empty encoding*/ );
-  ins_pipe(ialu_none);
-%}
-#endif
-#endif // AARCH64
 
 // Cast Index to Pointer for unsafe natives
 instruct castX2P(iRegX src, iRegP dst) %{
@@ -6024,7 +4721,6 @@ instruct castP2X(iRegP src, iRegX dst) %{
   ins_pipe(ialu_reg);
 %}
 
-#ifndef AARCH64
 //----------Conditional Move---------------------------------------------------
 // Conditional move
 instruct cmovIP_reg(cmpOpP cmp, flagsRegP pcc, iRegI dst, iRegI src) %{
@@ -6037,187 +4733,8 @@ instruct cmovIP_reg(cmpOpP cmp, flagsRegP pcc, iRegI dst, iRegI src) %{
   %}
   ins_pipe(ialu_reg);
 %}
-#endif
-
-#ifdef AARCH64
-instruct cmovI_reg3(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src1, iRegI src2) %{
-  match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovL_reg3(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src1, iRegL src2) %{
-  match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovP_reg3(cmpOp cmp, flagsReg icc, iRegP dst, iRegP src1, iRegP src2) %{
-  match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovN_reg3(cmpOp cmp, flagsReg icc, iRegN dst, iRegN src1, iRegN src2) %{
-  match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovIP_reg3(cmpOpP cmp, flagsRegP icc, iRegI dst, iRegI src1, iRegI src2) %{
-  match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovLP_reg3(cmpOpP cmp, flagsRegP icc, iRegL dst, iRegL src1, iRegL src2) %{
-  match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovPP_reg3(cmpOpP cmp, flagsRegP icc, iRegP dst, iRegP src1, iRegP src2) %{
-  match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovNP_reg3(cmpOpP cmp, flagsRegP icc, iRegN dst, iRegN src1, iRegN src2) %{
-  match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovIU_reg3(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src1, iRegI src2) %{
-  match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovLU_reg3(cmpOpU cmp, flagsRegU icc, iRegL dst, iRegL src1, iRegL src2) %{
-  match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovPU_reg3(cmpOpU cmp, flagsRegU icc, iRegP dst, iRegP src1, iRegP src2) %{
-  match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovNU_reg3(cmpOpU cmp, flagsRegU icc, iRegN dst, iRegN src1, iRegN src2) %{
-  match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovIZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, iRegI src1, iRegI src2) %{
-  match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
 
-instruct cmovLZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, iRegL src1, iRegL src2) %{
-  match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovPZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, iRegP src1, iRegP src2) %{
-  match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct cmovNZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegN dst, iRegN src1, iRegN src2) %{
-  match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %}
-  ins_encode %{
-    __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-#endif // AARCH64
 
-#ifndef AARCH64
 instruct cmovIP_immMov(cmpOpP cmp, flagsRegP pcc, iRegI dst, immIMov src) %{
   match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src)));
   ins_cost(140);
@@ -6239,7 +4756,6 @@ instruct cmovIP_imm16(cmpOpP cmp, flagsRegP pcc, iRegI dst, immI16 src) %{
   %}
   ins_pipe(ialu_imm);
 %}
-#endif
 
 instruct cmovI_reg(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src) %{
   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
@@ -6252,20 +4768,7 @@ instruct cmovI_reg(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src) %{
   ins_pipe(ialu_reg);
 %}
 
-#ifdef AARCH64
-instruct cmovL_reg(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src) %{
-  match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
-  ins_cost(150);
-  size(4);
-  format %{ "MOV$cmp  $dst,$src\t! long" %}
-  ins_encode %{
-    __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(ialu_reg);
-%}
-#endif
 
-#ifndef AARCH64
 instruct cmovI_immMov(cmpOp cmp, flagsReg icc, iRegI dst, immIMov src) %{
   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
   ins_cost(140);
@@ -6287,7 +4790,6 @@ instruct cmovII_imm16(cmpOp cmp, flagsReg icc, iRegI dst, immI16 src) %{
   %}
   ins_pipe(ialu_imm);
 %}
-#endif
 
 instruct cmovII_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, iRegI src) %{
   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
@@ -6304,7 +4806,6 @@ instruct cmovII_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, iRegI
   ins_pipe(ialu_reg);
 %}
 
-#ifndef AARCH64
 instruct cmovII_immMov_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, immIMov src) %{
   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
@@ -6334,7 +4835,6 @@ instruct cmovII_imm16_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, imm
   %}
   ins_pipe(ialu_imm);
 %}
-#endif
 
 instruct cmovIIu_reg(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src) %{
   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
@@ -6347,7 +4847,6 @@ instruct cmovIIu_reg(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src) %{
   ins_pipe(ialu_reg);
 %}
 
-#ifndef AARCH64
 instruct cmovIIu_immMov(cmpOpU cmp, flagsRegU icc, iRegI dst, immIMov src) %{
   match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
   ins_cost(140);
@@ -6369,7 +4868,6 @@ instruct cmovIIu_imm16(cmpOpU cmp, flagsRegU icc, iRegI dst, immI16 src) %{
   %}
   ins_pipe(ialu_imm);
 %}
-#endif
 
 // Conditional move
 instruct cmovPP_reg(cmpOpP cmp, flagsRegP pcc, iRegP dst, iRegP src) %{
@@ -6387,17 +4885,9 @@ instruct cmovPP_imm(cmpOpP cmp, flagsRegP pcc, iRegP dst, immP0 src) %{
   match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src)));
   ins_cost(140);
   size(4);
-#ifdef AARCH64
-  format %{ "MOV$cmp  $dst,ZR" %}
-#else
   format %{ "MOV$cmp  $dst,$src" %}
-#endif
   ins_encode %{
-#ifdef AARCH64
-    __ mov($dst$$Register,             ZR, (AsmCondition)($cmp$$cmpcode));
-#else
     __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
-#endif
   %}
   ins_pipe(ialu_imm);
 %}
@@ -6448,17 +4938,9 @@ instruct cmovPI_imm(cmpOp cmp, flagsReg icc, iRegP dst, immP0 src) %{
   ins_cost(140);
 
   size(4);
-#ifdef AARCH64
-  format %{ "MOV$cmp  $dst,ZR\t! ptr" %}
-#else
   format %{ "MOV$cmp  $dst,$src\t! ptr" %}
-#endif
   ins_encode %{
-#ifdef AARCH64
-    __ mov($dst$$Register,             ZR, (AsmCondition)($cmp$$cmpcode));
-#else
     __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
-#endif
   %}
   ins_pipe(ialu_imm);
 %}
@@ -6472,17 +4954,9 @@ instruct cmovPI_imm_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, immP0
   ins_cost(140);
 
   size(4);
-#ifdef AARCH64
-  format %{ "MOV$cmp  $dst,ZR\t! ptr" %}
-#else
   format %{ "MOV$cmp  $dst,$src\t! ptr" %}
-#endif
   ins_encode %{
-#ifdef AARCH64
-    __ mov($dst$$Register,             ZR, (AsmCondition)($cmp$$cmpcode));
-#else
     __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
-#endif
   %}
   ins_pipe(ialu_imm);
 %}
@@ -6492,175 +4966,76 @@ instruct cmovPIu_imm(cmpOpU cmp, flagsRegU icc, iRegP dst, immP0 src) %{
   ins_cost(140);
 
   size(4);
-#ifdef AARCH64
-  format %{ "MOV$cmp  $dst,ZR\t! ptr" %}
-#else
   format %{ "MOV$cmp  $dst,$src\t! ptr" %}
-#endif
   ins_encode %{
-#ifdef AARCH64
-    __ mov($dst$$Register,             ZR, (AsmCondition)($cmp$$cmpcode));
-#else
     __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode));
-#endif
   %}
   ins_pipe(ialu_imm);
 %}
 
-#ifdef AARCH64
+
 // Conditional move
-instruct cmovF_reg(cmpOp cmp, flagsReg icc, regF dst, regF src1, regF src2) %{
-  match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1)));
+instruct cmovFP_reg(cmpOpP cmp, flagsRegP pcc, regF dst, regF src) %{
+  match(Set dst (CMoveF (Binary cmp pcc) (Binary dst src)));
   ins_cost(150);
   size(4);
-  format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %}
+  format %{ "FCPYS$cmp $dst,$src" %}
   ins_encode %{
-    __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+    __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
   %}
   ins_pipe(int_conditional_float_move);
 %}
 
-instruct cmovD_reg(cmpOp cmp, flagsReg icc, regD dst, regD src1, regD src2) %{
-  match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1)));
+instruct cmovFI_reg(cmpOp cmp, flagsReg icc, regF dst, regF src) %{
+  match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
   ins_cost(150);
+
   size(4);
-  format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %}
+  format %{ "FCPYS$cmp $dst,$src" %}
   ins_encode %{
-    __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+    __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
   %}
   ins_pipe(int_conditional_float_move);
 %}
 
-instruct cmovFP_reg(cmpOpP cmp, flagsRegP icc, regF dst, regF src1, regF src2) %{
-  match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1)));
+instruct cmovFI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, regF dst, regF src) %{
+  match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
+  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
+            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
   ins_cost(150);
+
   size(4);
-  format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %}
+  format %{ "FCPYS$cmp $dst,$src" %}
   ins_encode %{
-    __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+    __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
   %}
   ins_pipe(int_conditional_float_move);
 %}
 
-instruct cmovDP_reg(cmpOpP cmp, flagsRegP icc, regD dst, regD src1, regD src2) %{
-  match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1)));
+instruct cmovFIu_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src) %{
+  match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
   ins_cost(150);
+
   size(4);
-  format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %}
+  format %{ "FCPYS$cmp $dst,$src" %}
   ins_encode %{
-    __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+    __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
   %}
   ins_pipe(int_conditional_float_move);
 %}
 
-instruct cmovFU_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src1, regF src2) %{
-  match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1)));
+// Conditional move
+instruct cmovDP_reg(cmpOpP cmp, flagsRegP pcc, regD dst, regD src) %{
+  match(Set dst (CMoveD (Binary cmp pcc) (Binary dst src)));
   ins_cost(150);
   size(4);
-  format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %}
+  format %{ "FCPYD$cmp $dst,$src" %}
   ins_encode %{
-    __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
+    __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
   %}
-  ins_pipe(int_conditional_float_move);
-%}
-
-instruct cmovDU_reg(cmpOpU cmp, flagsRegU icc, regD dst, regD src1, regD src2) %{
-  match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %}
-  ins_encode %{
-    __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(int_conditional_float_move);
-%}
-
-instruct cmovFZ_reg(cmpOp0 cmp, flagsReg_EQNELTGE icc, regF dst, regF src1, regF src2) %{
-  match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %}
-  ins_encode %{
-    __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(int_conditional_float_move);
-%}
-
-instruct cmovDZ_reg(cmpOp0 cmp, flagsReg_EQNELTGE icc, regD dst, regD src1, regD src2) %{
-  match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1)));
-  ins_cost(150);
-  size(4);
-  format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %}
-  ins_encode %{
-    __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(int_conditional_float_move);
-%}
-
-#else // !AARCH64
-
-// Conditional move
-instruct cmovFP_reg(cmpOpP cmp, flagsRegP pcc, regF dst, regF src) %{
-  match(Set dst (CMoveF (Binary cmp pcc) (Binary dst src)));
-  ins_cost(150);
-  size(4);
-  format %{ "FCPYS$cmp $dst,$src" %}
-  ins_encode %{
-    __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(int_conditional_float_move);
-%}
-
-instruct cmovFI_reg(cmpOp cmp, flagsReg icc, regF dst, regF src) %{
-  match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
-  ins_cost(150);
-
-  size(4);
-  format %{ "FCPYS$cmp $dst,$src" %}
-  ins_encode %{
-    __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(int_conditional_float_move);
-%}
-
-instruct cmovFI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, regF dst, regF src) %{
-  match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
-  predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
-            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ||
-            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt ||
-            _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
-  ins_cost(150);
-
-  size(4);
-  format %{ "FCPYS$cmp $dst,$src" %}
-  ins_encode %{
-    __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(int_conditional_float_move);
-%}
-
-instruct cmovFIu_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src) %{
-  match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
-  ins_cost(150);
-
-  size(4);
-  format %{ "FCPYS$cmp $dst,$src" %}
-  ins_encode %{
-    __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(int_conditional_float_move);
-%}
-
-// Conditional move
-instruct cmovDP_reg(cmpOpP cmp, flagsRegP pcc, regD dst, regD src) %{
-  match(Set dst (CMoveD (Binary cmp pcc) (Binary dst src)));
-  ins_cost(150);
-  size(4);
-  format %{ "FCPYD$cmp $dst,$src" %}
-  ins_encode %{
-    __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode));
-  %}
-  ins_pipe(int_conditional_double_move);
+  ins_pipe(int_conditional_double_move);
 %}
 
 instruct cmovDI_reg(cmpOp cmp, flagsReg icc, regD dst, regD src) %{
@@ -6858,7 +5233,6 @@ instruct cmovLIu_reg(cmpOpU cmp, flagsRegU icc, iRegL dst, iRegL src) %{
   %}
   ins_pipe(ialu_reg);
 %}
-#endif // !AARCH64
 
 
 //----------OS and Locking Instructions----------------------------------------
@@ -6915,7 +5289,6 @@ instruct addI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct addshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (AddI (LShiftI src1 src2) src3));
 
@@ -6926,22 +5299,7 @@ instruct addshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
-#ifdef AARCH64
-#ifdef TODO
-instruct addshlL_reg_imm_reg(iRegL dst, iRegL src1, immU6 src2, iRegL src3) %{
-  match(Set dst (AddL (LShiftL src1 src2) src3));
-
-  size(4);
-  format %{ "ADD    $dst,$src3,$src1<<$src2\t! long" %}
-  ins_encode %{
-    __ add($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsl, $src2$$constant));
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-#endif
-#endif
 
 instruct addshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
   match(Set dst (AddI (LShiftI src1 src2) src3));
@@ -6954,7 +5312,6 @@ instruct addshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct addsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (AddI (RShiftI src1 src2) src3));
 
@@ -6965,7 +5322,6 @@ instruct addsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct addsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
   match(Set dst (AddI (RShiftI src1 src2) src3));
@@ -6978,7 +5334,6 @@ instruct addsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct addshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (AddI (URShiftI src1 src2) src3));
 
@@ -6989,7 +5344,6 @@ instruct addshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct addshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
   match(Set dst (AddI (URShiftI src1 src2) src3));
@@ -7026,69 +5380,6 @@ instruct addP_reg_reg(iRegP dst, iRegP src1, iRegX src2) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifdef AARCH64
-// unshifted I2L operand
-operand unshiftedI2L(iRegI src2) %{
-//constraint(ALLOC_IN_RC(sp_ptr_reg));
-  match(ConvI2L src2);
-
-  op_cost(1);
-  format %{ "$src2.w" %}
-  interface(MEMORY_INTER) %{
-    base($src2);
-    index(0xff);
-    scale(0x0);
-    disp(0x0);
-  %}
-%}
-
-// shifted I2L operand
-operand shiftedI2L(iRegI src2, immI_0_4 src3) %{
-//constraint(ALLOC_IN_RC(sp_ptr_reg));
-  match(LShiftX (ConvI2L src2) src3);
-
-  op_cost(1);
-  format %{ "$src2.w << $src3" %}
-  interface(MEMORY_INTER) %{
-    base($src2);
-    index(0xff);
-    scale($src3);
-    disp(0x0);
-  %}
-%}
-
-opclass shiftedRegI(shiftedI2L, unshiftedI2L);
-
-instruct shlL_reg_regI(iRegL dst, iRegI src1, immU6 src2) %{
-  match(Set dst (LShiftL (ConvI2L src1) src2));
-
-  size(4);
-  format %{ "LSL    $dst,$src1.w,$src2\t! ptr" %}
-  ins_encode %{
-    int c = $src2$$constant;
-    int r = 64 - c;
-    int s = 31;
-    if (s >= r) {
-      s = r - 1;
-    }
-    __ sbfm($dst$$Register, $src1$$Register, r, s);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addP_reg_regI(iRegP dst, iRegP src1, shiftedRegI src2) %{
-  match(Set dst (AddP src1 src2));
-
-  ins_cost(DEFAULT_COST * 3/2);
-  size(4);
-  format %{ "ADD    $dst,$src1,$src2, sxtw\t! ptr" %}
-  ins_encode %{
-    Register base = reg_to_register_object($src2$$base);
-    __ add($dst$$Register, $src1$$Register, base, ex_sxtw, $src2$$scale);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-#endif
 
 // shifted iRegX operand
 operand shiftedX(iRegX src2, shimmX src3) %{
@@ -7131,30 +5422,6 @@ instruct addP_reg_aimmX(iRegP dst, iRegP src1, aimmX src2) %{
 %}
 
 // Long Addition
-#ifdef AARCH64
-instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
-  match(Set dst (AddL src1 src2));
-  size(4);
-  format %{ "ADD     $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ add($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addL_reg_regI(iRegL dst, iRegL src1, shiftedRegI src2) %{
-  match(Set dst (AddL src1 src2));
-
-  ins_cost(DEFAULT_COST * 3/2);
-  size(4);
-  format %{ "ADD    $dst,$src1,$src2, sxtw\t! long" %}
-  ins_encode %{
-    Register base = reg_to_register_object($src2$$base);
-    __ add($dst$$Register, $src1$$Register, base, ex_sxtw, $src2$$scale);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-#else
 instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg ccr) %{
   match(Set dst (AddL src1 src2));
   effect(KILL ccr);
@@ -7167,36 +5434,9 @@ instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg ccr) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
-#ifdef AARCH64
-// Immediate Addition
-instruct addL_reg_aimm(iRegL dst, iRegL src1, aimmL src2) %{
-  match(Set dst (AddL src1 src2));
-
-  size(4);
-  format %{ "ADD    $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ add($dst$$Register, $src1$$Register, $src2$$constant);
-  %}
-  ins_pipe(ialu_reg_imm);
-%}
-
-instruct addL_reg_immLneg(iRegL dst, iRegL src1, aimmLneg src2) %{
-  match(Set dst (SubL src1 src2));
-
-  size(4);
-  format %{ "ADD    $dst,$src1,-($src2)\t! long" %}
-  ins_encode %{
-    __ add($dst$$Register, $src1$$Register, -$src2$$constant);
-  %}
-  ins_pipe(ialu_reg_imm);
-%}
-#else
 // TODO
-#endif
 
-#ifndef AARCH64
 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
 // (hi($con$$constant), lo($con$$constant)) becomes
 instruct addL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg ccr) %{
@@ -7211,26 +5451,19 @@ instruct addL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg ccr) %{
   %}
   ins_pipe(ialu_reg_imm);
 %}
-#endif
 
 //----------Conditional_store--------------------------------------------------
 // Conditional-store of the updated heap-top.
 // Used during allocation of the shared heap.
 // Sets flags (EQ) on success.
 
-// TODO: optimize out barriers with AArch64 load-acquire/store-release
 // LoadP-locked.
 instruct loadPLocked(iRegP dst, memoryex mem) %{
   match(Set dst (LoadPLocked mem));
   size(4);
   format %{ "LDREX  $dst,$mem" %}
   ins_encode %{
-#ifdef AARCH64
-    Register base = reg_to_register_object($mem$$base);
-    __ ldxr($dst$$Register, base);
-#else
     __ ldrex($dst$$Register,$mem$$Address);
-#endif
   %}
   ins_pipe(iload_mem);
 %}
@@ -7243,12 +5476,7 @@ instruct storePConditional( memoryex heap_top_ptr, iRegP oldval, iRegP newval, i
   format %{ "STREX  $tmp,$newval,$heap_top_ptr\n\t"
             "CMP    $tmp, 0" %}
   ins_encode %{
-#ifdef AARCH64
-    Register base = reg_to_register_object($heap_top_ptr$$base);
-    __ stxr($tmp$$Register, $newval$$Register, base);
-#else
     __ strex($tmp$$Register, $newval$$Register, $heap_top_ptr$$Address);
-#endif
     __ cmp($tmp$$Register, 0);
   %}
   ins_pipe( long_memory_op );
@@ -7256,20 +5484,6 @@ instruct storePConditional( memoryex heap_top_ptr, iRegP oldval, iRegP newval, i
 
 // Conditional-store of an intx value.
 instruct storeXConditional( memoryex mem, iRegX oldval, iRegX newval, iRegX tmp, flagsReg icc ) %{
-#ifdef AARCH64
-  match(Set icc (StoreLConditional mem (Binary oldval newval)));
-  effect( TEMP tmp );
-  size(28);
-  format %{ "loop:\n\t"
-            "LDXR     $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem], DOESN'T set $newval=[$mem] in any case\n\t"
-            "SUBS     $tmp, $tmp, $oldval\n\t"
-            "B.ne     done\n\t"
-            "STXR     $tmp, $newval, $mem\n\t"
-            "CBNZ_w   $tmp, loop\n\t"
-            "CMP      $tmp, 0\n\t"
-            "done:\n\t"
-            "membar   LoadStore|LoadLoad" %}
-#else
   match(Set icc (StoreIConditional mem (Binary oldval newval)));
   effect( TEMP tmp );
   size(28);
@@ -7281,29 +5495,15 @@ instruct storeXConditional( memoryex mem, iRegX oldval, iRegX newval, iRegX tmp,
             "B.eq     loop \n\t"
             "TEQ      $tmp, 0\n\t"
             "membar   LoadStore|LoadLoad" %}
-#endif
   ins_encode %{
     Label loop;
     __ bind(loop);
-#ifdef AARCH64
-// FIXME: use load-acquire/store-release, remove membar?
-    Label done;
-    Register base = reg_to_register_object($mem$$base);
-    __ ldxr($tmp$$Register, base);
-    __ subs($tmp$$Register, $tmp$$Register, $oldval$$Register);
-    __ b(done, ne);
-    __ stxr($tmp$$Register, $newval$$Register, base);
-    __ cbnz_w($tmp$$Register, loop);
-    __ cmp($tmp$$Register, 0);
-    __ bind(done);
-#else
     __ ldrex($tmp$$Register, $mem$$Address);
     __ eors($tmp$$Register, $tmp$$Register, $oldval$$Register);
     __ strex($tmp$$Register, $newval$$Register, $mem$$Address, eq);
     __ cmp($tmp$$Register, 1, eq);
     __ b(loop, eq);
     __ teq($tmp$$Register, 0);
-#endif
     // used by biased locking only. Requires a membar.
     __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadStore | MacroAssembler::LoadLoad), noreg);
   %}
@@ -7312,118 +5512,6 @@ instruct storeXConditional( memoryex mem, iRegX oldval, iRegX newval, iRegX tmp,
 
 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 
-#ifdef AARCH64
-// TODO: if combined with membar, elide membar and use
-// load-acquire/store-release if appropriate
-instruct compareAndSwapL_bool(memoryex mem, iRegL oldval, iRegL newval, iRegI res, iRegI tmp, flagsReg ccr) %{
-  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
-  effect( KILL ccr, TEMP tmp);
-  size(24);
-  format %{ "loop:\n\t"
-            "LDXR     $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
-            "CMP      $tmp, $oldval\n\t"
-            "B.ne     done\n\t"
-            "STXR     $tmp, $newval, $mem\n\t"
-            "CBNZ_w   $tmp, loop\n\t"
-            "done:\n\t"
-            "CSET_w   $res, eq" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    Label loop, done;
-    __ bind(loop);
-    __ ldxr($tmp$$Register, base);
-    __ cmp($tmp$$Register, $oldval$$Register);
-    __ b(done, ne);
-    __ stxr($tmp$$Register, $newval$$Register, base);
-    __ cbnz_w($tmp$$Register, loop);
-    __ bind(done);
-    __ cset_w($res$$Register, eq);
-  %}
-  ins_pipe( long_memory_op );
-%}
-
-instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr) %{
-  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
-  effect( KILL ccr, TEMP tmp);
-  size(24);
-  format %{ "loop:\n\t"
-            "LDXR_w   $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
-            "CMP_w    $tmp, $oldval\n\t"
-            "B.ne     done\n\t"
-            "STXR_w   $tmp, $newval, $mem\n\t"
-            "CBNZ_w   $tmp, loop\n\t"
-            "done:\n\t"
-            "CSET_w   $res, eq" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    Label loop, done;
-    __ bind(loop);
-    __ ldxr_w($tmp$$Register, base);
-    __ cmp_w($tmp$$Register, $oldval$$Register);
-    __ b(done, ne);
-    __ stxr_w($tmp$$Register, $newval$$Register,  base);
-    __ cbnz_w($tmp$$Register, loop);
-    __ bind(done);
-    __ cset_w($res$$Register, eq);
-  %}
-  ins_pipe( long_memory_op );
-%}
-
-// tmp must use iRegI instead of iRegN until 8051805 is fixed.
-instruct compareAndSwapN_bool(memoryex mem, iRegN oldval, iRegN newval, iRegI res, iRegI tmp, flagsReg ccr) %{
-  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
-  effect( KILL ccr, TEMP tmp);
-  size(24);
-  format %{ "loop:\n\t"
-            "LDXR_w   $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
-            "CMP_w    $tmp, $oldval\n\t"
-            "B.ne     done\n\t"
-            "STXR_w   $tmp, $newval, $mem\n\t"
-            "CBNZ_w   $tmp, loop\n\t"
-            "done:\n\t"
-            "CSET_w   $res, eq" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    Label loop, done;
-    __ bind(loop);
-    __ ldxr_w($tmp$$Register, base);
-    __ cmp_w($tmp$$Register, $oldval$$Register);
-    __ b(done, ne);
-    __ stxr_w($tmp$$Register, $newval$$Register,  base);
-    __ cbnz_w($tmp$$Register, loop);
-    __ bind(done);
-    __ cset_w($res$$Register, eq);
-  %}
-  ins_pipe( long_memory_op );
-%}
-
-instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr) %{
-  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-  effect( KILL ccr, TEMP tmp);
-  size(24);
-  format %{ "loop:\n\t"
-            "LDXR     $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
-            "CMP      $tmp, $oldval\n\t"
-            "B.ne     done\n\t"
-            "STXR     $tmp, $newval, $mem\n\t"
-            "CBNZ_w   $tmp, loop\n\t"
-            "done:\n\t"
-            "CSET_w   $res, eq" %}
-  ins_encode %{
-    Register base = reg_to_register_object($mem$$base);
-    Label loop, done;
-    __ bind(loop);
-    __ ldxr($tmp$$Register, base);
-    __ cmp($tmp$$Register, $oldval$$Register);
-    __ b(done, ne);
-    __ stxr($tmp$$Register, $newval$$Register,  base);
-    __ cbnz_w($tmp$$Register, loop);
-    __ bind(done);
-    __ cset_w($res$$Register, eq);
-  %}
-  ins_pipe( long_memory_op );
-%}
-#else // !AARCH64
 instruct compareAndSwapL_bool(memoryex mem, iRegL oldval, iRegLd newval, iRegI res, iRegLd tmp, flagsReg ccr ) %{
   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
   effect( KILL ccr, TEMP tmp);
@@ -7506,32 +5594,7 @@ instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI re
   %}
   ins_pipe( long_memory_op );
 %}
-#endif // !AARCH64
 
-#ifdef AARCH64
-instruct xaddI_aimmI_no_res(memoryex mem, aimmI add, Universe dummy, iRegI tmp1, iRegI tmp2) %{
-  predicate(n->as_LoadStore()->result_not_used());
-  match(Set dummy (GetAndAddI mem add));
-  effect(TEMP tmp1, TEMP tmp2);
-  size(16);
-  format %{ "loop:\n\t"
-            "LDXR_w   $tmp1, $mem\n\t"
-            "ADD_w    $tmp1, $tmp1, $add\n\t"
-            "STXR_w   $tmp2, $tmp1, $mem\n\t"
-            "CBNZ_w   $tmp2, loop" %}
-
-  ins_encode %{
-    Label loop;
-    Register base = reg_to_register_object($mem$$base);
-    __ bind(loop);
-    __ ldxr_w($tmp1$$Register, base);
-    __ add_w($tmp1$$Register, $tmp1$$Register, $add$$constant);
-    __ stxr_w($tmp2$$Register, $tmp1$$Register, base);
-    __ cbnz_w($tmp2$$Register, loop);
-  %}
-  ins_pipe( long_memory_op );
-%}
-#else
 instruct xaddI_aimmI_no_res(memoryex mem, aimmI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
   predicate(n->as_LoadStore()->result_not_used());
   match(Set dummy (GetAndAddI mem add));
@@ -7555,32 +5618,7 @@ instruct xaddI_aimmI_no_res(memoryex mem, aimmI add, Universe dummy, iRegI tmp1,
   %}
   ins_pipe( long_memory_op );
 %}
-#endif
-
-#ifdef AARCH64
-instruct xaddI_reg_no_res(memoryex mem, iRegI add, Universe dummy, iRegI tmp1, iRegI tmp2) %{
-  predicate(n->as_LoadStore()->result_not_used());
-  match(Set dummy (GetAndAddI mem add));
-  effect(TEMP tmp1, TEMP tmp2);
-  size(16);
-  format %{ "loop:\n\t"
-            "LDXR_w   $tmp1, $mem\n\t"
-            "ADD_w    $tmp1, $tmp1, $add\n\t"
-            "STXR_w   $tmp2, $tmp1, $mem\n\t"
-            "CBNZ_w   $tmp2, loop" %}
 
-  ins_encode %{
-    Label loop;
-    Register base = reg_to_register_object($mem$$base);
-    __ bind(loop);
-    __ ldxr_w($tmp1$$Register, base);
-    __ add_w($tmp1$$Register, $tmp1$$Register, $add$$Register);
-    __ stxr_w($tmp2$$Register, $tmp1$$Register, base);
-    __ cbnz_w($tmp2$$Register, loop);
-  %}
-  ins_pipe( long_memory_op );
-%}
-#else
 instruct xaddI_reg_no_res(memoryex mem, iRegI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
   predicate(n->as_LoadStore()->result_not_used());
   match(Set dummy (GetAndAddI mem add));
@@ -7604,31 +5642,7 @@ instruct xaddI_reg_no_res(memoryex mem, iRegI add, Universe dummy, iRegI tmp1, i
   %}
   ins_pipe( long_memory_op );
 %}
-#endif
-
-#ifdef AARCH64
-instruct xaddI_aimmI(memoryex mem, aimmI add, iRegI res, iRegI tmp1, iRegI tmp2) %{
-  match(Set res (GetAndAddI mem add));
-  effect(TEMP tmp1, TEMP tmp2, TEMP res);
-  size(16);
-  format %{ "loop:\n\t"
-            "LDXR_w   $res, $mem\n\t"
-            "ADD_w    $tmp1, $res, $add\n\t"
-            "STXR_w   $tmp2, $tmp1, $mem\n\t"
-            "CBNZ_w   $tmp2, loop" %}
 
-  ins_encode %{
-    Label loop;
-    Register base = reg_to_register_object($mem$$base);
-    __ bind(loop);
-    __ ldxr_w($res$$Register, base);
-    __ add_w($tmp1$$Register, $res$$Register, $add$$constant);
-    __ stxr_w($tmp2$$Register, $tmp1$$Register, base);
-    __ cbnz_w($tmp2$$Register, loop);
-  %}
-  ins_pipe( long_memory_op );
-%}
-#else
 instruct xaddI_aimmI(memoryex mem, aimmI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
   match(Set res (GetAndAddI mem add));
   effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
@@ -7651,31 +5665,7 @@ instruct xaddI_aimmI(memoryex mem, aimmI add, iRegI res, iRegI tmp1, iRegI tmp2,
   %}
   ins_pipe( long_memory_op );
 %}
-#endif
-
-#ifdef AARCH64
-instruct xaddI_reg(memoryex mem, iRegI add, iRegI res, iRegI tmp1, iRegI tmp2) %{
-  match(Set res (GetAndAddI mem add));
-  effect(TEMP tmp1, TEMP tmp2, TEMP res);
-  size(16);
-  format %{ "loop:\n\t"
-            "LDXR_w   $res, $mem\n\t"
-            "ADD_w    $tmp1, $res, $add\n\t"
-            "STXR_w   $tmp2, $tmp1, $mem\n\t"
-            "CBNZ_w   $tmp2, loop" %}
 
-  ins_encode %{
-    Label loop;
-    Register base = reg_to_register_object($mem$$base);
-    __ bind(loop);
-    __ ldxr_w($res$$Register, base);
-    __ add_w($tmp1$$Register, $res$$Register, $add$$Register);
-    __ stxr_w($tmp2$$Register, $tmp1$$Register, base);
-    __ cbnz_w($tmp2$$Register, loop);
-  %}
-  ins_pipe( long_memory_op );
-%}
-#else
 instruct xaddI_reg(memoryex mem, iRegI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{
   match(Set res (GetAndAddI mem add));
   effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
@@ -7698,32 +5688,7 @@ instruct xaddI_reg(memoryex mem, iRegI add, iRegI res, iRegI tmp1, iRegI tmp2, f
   %}
   ins_pipe( long_memory_op );
 %}
-#endif
 
-#ifdef AARCH64
-instruct xaddL_reg_no_res(memoryex mem, iRegL add, Universe dummy, iRegL tmp1, iRegI tmp2) %{
-  predicate(n->as_LoadStore()->result_not_used());
-  match(Set dummy (GetAndAddL mem add));
-  effect(TEMP tmp1, TEMP tmp2);
-  size(16);
-  format %{ "loop:\n\t"
-            "LDXR     $tmp1, $mem\n\t"
-            "ADD      $tmp1, $tmp1, $add\n\t"
-            "STXR     $tmp2, $tmp1, $mem\n\t"
-            "CBNZ_w   $tmp2, loop" %}
-
-  ins_encode %{
-    Label loop;
-    Register base = reg_to_register_object($mem$$base);
-    __ bind(loop);
-    __ ldxr($tmp1$$Register, base);
-    __ add($tmp1$$Register, $tmp1$$Register, $add$$Register);
-    __ stxr($tmp2$$Register, $tmp1$$Register, base);
-    __ cbnz_w($tmp2$$Register, loop);
-  %}
-  ins_pipe( long_memory_op );
-%}
-#else
 instruct xaddL_reg_no_res(memoryex mem, iRegL add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
   predicate(n->as_LoadStore()->result_not_used());
   match(Set dummy (GetAndAddL mem add));
@@ -7749,32 +5714,7 @@ instruct xaddL_reg_no_res(memoryex mem, iRegL add, Universe dummy, iRegLd tmp1,
   %}
   ins_pipe( long_memory_op );
 %}
-#endif
-
-#ifdef AARCH64
-instruct xaddL_imm_no_res(memoryex mem, aimmL add, Universe dummy, iRegL tmp1, iRegI tmp2) %{
-  predicate(n->as_LoadStore()->result_not_used());
-  match(Set dummy (GetAndAddL mem add));
-  effect(TEMP tmp1, TEMP tmp2);
-  size(16);
-  format %{ "loop:\n\t"
-            "LDXR     $tmp1, $mem\n\t"
-            "ADD      $tmp1, $tmp1, $add\n\t"
-            "STXR     $tmp2, $tmp1, $mem\n\t"
-            "CBNZ_w   $tmp2, loop" %}
 
-  ins_encode %{
-    Label loop;
-    Register base = reg_to_register_object($mem$$base);
-    __ bind(loop);
-    __ ldxr($tmp1$$Register, base);
-    __ add($tmp1$$Register, $tmp1$$Register, $add$$constant);
-    __ stxr($tmp2$$Register, $tmp1$$Register, base);
-    __ cbnz_w($tmp2$$Register, loop);
-  %}
-  ins_pipe( long_memory_op );
-%}
-#else
 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
 // (hi($con$$constant), lo($con$$constant)) becomes
 instruct xaddL_immRot_no_res(memoryex mem, immLlowRot add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
@@ -7802,31 +5742,7 @@ instruct xaddL_immRot_no_res(memoryex mem, immLlowRot add, Universe dummy, iRegL
   %}
   ins_pipe( long_memory_op );
 %}
-#endif
-
-#ifdef AARCH64
-instruct xaddL_reg(memoryex mem, iRegL add, iRegL res, iRegL tmp1, iRegI tmp2) %{
-  match(Set res (GetAndAddL mem add));
-  effect(TEMP tmp1, TEMP tmp2, TEMP res);
-  size(16);
-  format %{ "loop:\n\t"
-            "LDXR     $res, $mem\n\t"
-            "ADD      $tmp1, $res, $add\n\t"
-            "STXR     $tmp2, $tmp1, $mem\n\t"
-            "CBNZ_w   $tmp2, loop" %}
 
-  ins_encode %{
-    Label loop;
-    Register base = reg_to_register_object($mem$$base);
-    __ bind(loop);
-    __ ldxr($res$$Register, base);
-    __ add($tmp1$$Register, $res$$Register, $add$$Register);
-    __ stxr($tmp2$$Register, $tmp1$$Register, base);
-    __ cbnz_w($tmp2$$Register, loop);
-  %}
-  ins_pipe( long_memory_op );
-%}
-#else
 instruct xaddL_reg(memoryex mem, iRegL add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
   match(Set res (GetAndAddL mem add));
   effect( KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res);
@@ -7835,47 +5751,23 @@ instruct xaddL_reg(memoryex mem, iRegL add, iRegLd res, iRegLd tmp1, iRegI tmp2,
             "LDREXD   $res, $mem\n\t"
             "ADDS     $tmp1.lo, $res.lo, $add.lo\n\t"
             "ADC      $tmp1.hi, $res.hi, $add.hi\n\t"
-            "STREXD   $tmp2, $tmp1, $mem\n\t"
-            "CMP      $tmp2, 0 \n\t"
-            "B.ne     loop \n\t" %}
-
-  ins_encode %{
-    Label loop;
-    __ bind(loop);
-    __ ldrexd($res$$Register, $mem$$Address);
-    __ adds($tmp1$$Register, $res$$Register, $add$$Register);
-    __ adc($tmp1$$Register->successor(), $res$$Register->successor(), $add$$Register->successor());
-    __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
-    __ cmp($tmp2$$Register, 0);
-    __ b(loop, ne);
-  %}
-  ins_pipe( long_memory_op );
-%}
-#endif
-
-#ifdef AARCH64
-instruct xaddL_imm(memoryex mem, aimmL add, iRegL res, iRegL tmp1, iRegI tmp2) %{
-  match(Set res (GetAndAddL mem add));
-  effect(TEMP tmp1, TEMP tmp2, TEMP res);
-  size(16);
-  format %{ "loop:\n\t"
-            "LDXR     $res, $mem\n\t"
-            "ADD      $tmp1, $res, $add\n\t"
-            "STXR     $tmp2, $tmp1, $mem\n\t"
-            "CBNZ_w   $tmp2, loop" %}
+            "STREXD   $tmp2, $tmp1, $mem\n\t"
+            "CMP      $tmp2, 0 \n\t"
+            "B.ne     loop \n\t" %}
 
   ins_encode %{
     Label loop;
-    Register base = reg_to_register_object($mem$$base);
     __ bind(loop);
-    __ ldxr($res$$Register, base);
-    __ add($tmp1$$Register, $res$$Register, $add$$constant);
-    __ stxr($tmp2$$Register, $tmp1$$Register, base);
-    __ cbnz_w($tmp2$$Register, loop);
+    __ ldrexd($res$$Register, $mem$$Address);
+    __ adds($tmp1$$Register, $res$$Register, $add$$Register);
+    __ adc($tmp1$$Register->successor(), $res$$Register->successor(), $add$$Register->successor());
+    __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address);
+    __ cmp($tmp2$$Register, 0);
+    __ b(loop, ne);
   %}
   ins_pipe( long_memory_op );
 %}
-#else
+
 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
 // (hi($con$$constant), lo($con$$constant)) becomes
 instruct xaddL_immRot(memoryex mem, immLlowRot add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{
@@ -7902,52 +5794,7 @@ instruct xaddL_immRot(memoryex mem, immLlowRot add, iRegLd res, iRegLd tmp1, iRe
   %}
   ins_pipe( long_memory_op );
 %}
-#endif
-
-#ifdef AARCH64
-instruct xchgI(memoryex mem, iRegI newval, iRegI res, iRegI tmp) %{
-  match(Set res (GetAndSetI mem newval));
-  effect(TEMP tmp, TEMP res);
-  size(12);
-  format %{ "loop:\n\t"
-            "LDXR_w   $res, $mem\n\t"
-            "STXR_w   $tmp, $newval, $mem\n\t"
-            "CBNZ_w   $tmp, loop" %}
-
-  ins_encode %{
-    Label loop;
-    Register base = reg_to_register_object($mem$$base);
-    __ bind(loop);
-    __ ldxr_w($res$$Register, base);
-    __ stxr_w($tmp$$Register, $newval$$Register, base);
-    __ cbnz_w($tmp$$Register, loop);
-  %}
-  ins_pipe( long_memory_op );
-%}
-
-#ifdef XXX
-// Disabled until 8051805 is fixed.
-instruct xchgN(memoryex mem, iRegN newval, iRegN res, iRegN tmp) %{
-  match(Set res (GetAndSetN mem newval));
-  effect(TEMP tmp, TEMP res);
-  size(12);
-  format %{ "loop:\n\t"
-            "LDXR_w   $res, $mem\n\t"
-            "STXR_w   $tmp, $newval, $mem\n\t"
-            "CBNZ_w   $tmp, loop" %}
 
-  ins_encode %{
-    Label loop;
-    Register base = reg_to_register_object($mem$$base);
-    __ bind(loop);
-    __ ldxr_w($res$$Register, base);
-    __ stxr_w($tmp$$Register, $newval$$Register, base);
-    __ cbnz_w($tmp$$Register, loop);
-  %}
-  ins_pipe( long_memory_op );
-%}
-#endif
-#else
 instruct xchgI(memoryex mem, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr) %{
   match(Set res (GetAndSetI mem newval));
   effect(KILL ccr, TEMP tmp, TEMP res);
@@ -7968,29 +5815,7 @@ instruct xchgI(memoryex mem, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr) %
   %}
   ins_pipe( long_memory_op );
 %}
-#endif
-
-#ifdef AARCH64
-instruct xchgL(memoryex mem, iRegL newval, iRegL res, iRegI tmp) %{
-  match(Set res (GetAndSetL mem newval));
-  effect(TEMP tmp, TEMP res);
-  size(12);
-  format %{ "loop:\n\t"
-            "LDXR     $res, $mem\n\t"
-            "STXR     $tmp, $newval, $mem\n\t"
-            "CBNZ_w   $tmp, loop" %}
 
-  ins_encode %{
-    Label loop;
-    Register base = reg_to_register_object($mem$$base);
-    __ bind(loop);
-    __ ldxr($res$$Register, base);
-    __ stxr($tmp$$Register, $newval$$Register, base);
-    __ cbnz_w($tmp$$Register, loop);
-  %}
-  ins_pipe( long_memory_op );
-%}
-#else
 instruct xchgL(memoryex mem, iRegLd newval, iRegLd res, iRegI tmp, flagsReg ccr) %{
   match(Set res (GetAndSetL mem newval));
   effect( KILL ccr, TEMP tmp, TEMP res);
@@ -8011,29 +5836,7 @@ instruct xchgL(memoryex mem, iRegLd newval, iRegLd res, iRegI tmp, flagsReg ccr)
   %}
   ins_pipe( long_memory_op );
 %}
-#endif // !AARCH64
-
-#ifdef AARCH64
-instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp) %{
-  match(Set res (GetAndSetP mem newval));
-  effect(TEMP tmp, TEMP res);
-  size(12);
-  format %{ "loop:\n\t"
-            "LDREX    $res, $mem\n\t"
-            "STREX    $tmp, $newval, $mem\n\t"
-            "CBNZ_w   $tmp, loop" %}
 
-  ins_encode %{
-    Label loop;
-    Register base = reg_to_register_object($mem$$base);
-    __ bind(loop);
-    __ ldrex($res$$Register, base);
-    __ strex($tmp$$Register, $newval$$Register, base);
-    __ cbnz_w($tmp$$Register, loop);
-  %}
-  ins_pipe( long_memory_op );
-%}
-#else
 instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp, flagsReg ccr) %{
   match(Set res (GetAndSetP mem newval));
   effect(KILL ccr, TEMP tmp, TEMP res);
@@ -8054,7 +5857,6 @@ instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp, flagsReg ccr) %
   %}
   ins_pipe( long_memory_op );
 %}
-#endif // !AARCH64
 
 //---------------------
 // Subtraction Instructions
@@ -8070,7 +5872,6 @@ instruct subI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct subshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (SubI src1 (LShiftI src2 src3)));
 
@@ -8081,7 +5882,6 @@ instruct subshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct subshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   match(Set dst (SubI src1 (LShiftI src2 src3)));
@@ -8094,7 +5894,6 @@ instruct subshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct subsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (SubI src1 (RShiftI src2 src3)));
 
@@ -8105,7 +5904,6 @@ instruct subsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct subsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   match(Set dst (SubI src1 (RShiftI src2 src3)));
@@ -8118,7 +5916,6 @@ instruct subsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct subshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (SubI src1 (URShiftI src2 src3)));
 
@@ -8129,7 +5926,6 @@ instruct subshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct subshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   match(Set dst (SubI src1 (URShiftI src2 src3)));
@@ -8142,7 +5938,6 @@ instruct subshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct rsbshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (SubI (LShiftI src1 src2) src3));
 
@@ -8208,7 +6003,6 @@ instruct rsbshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 // Immediate Subtraction
 instruct subI_reg_aimmI(iRegI dst, iRegI src1, aimmI src2) %{
@@ -8233,7 +6027,6 @@ instruct subI_reg_immRotneg(iRegI dst, iRegI src1, aimmIneg src2) %{
   ins_pipe(ialu_reg_imm);
 %}
 
-#ifndef AARCH64
 instruct subI_immRot_reg(iRegI dst, immIRot src1, iRegI src2) %{
   match(Set dst (SubI src1 src2));
 
@@ -8244,21 +6037,8 @@ instruct subI_immRot_reg(iRegI dst, immIRot src1, iRegI src2) %{
   %}
   ins_pipe(ialu_zero_reg);
 %}
-#endif
 
 // Register Subtraction
-#ifdef AARCH64
-instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
-  match(Set dst (SubL src1 src2));
-
-  size(4);
-  format %{ "SUB    $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ sub($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-#else
 instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg icc ) %{
   match(Set dst (SubL src1 src2));
   effect (KILL icc);
@@ -8272,36 +6052,9 @@ instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg icc ) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
-
-#ifdef AARCH64
-// Immediate Subtraction
-instruct subL_reg_aimm(iRegL dst, iRegL src1, aimmL src2) %{
-  match(Set dst (SubL src1 src2));
-
-  size(4);
-  format %{ "SUB    $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ sub($dst$$Register, $src1$$Register, $src2$$constant);
-  %}
-  ins_pipe(ialu_reg_imm);
-%}
-
-instruct subL_reg_immLneg(iRegL dst, iRegL src1, aimmLneg src2) %{
-  match(Set dst (AddL src1 src2));
 
-  size(4);
-  format %{ "SUB    $dst,$src1,-($src2)\t! long" %}
-  ins_encode %{
-    __ sub($dst$$Register, $src1$$Register, -$src2$$constant);
-  %}
-  ins_pipe(ialu_reg_imm);
-%}
-#else
 // TODO
-#endif
 
-#ifndef AARCH64
 // Immediate Subtraction
 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
 // (hi($con$$constant), lo($con$$constant)) becomes
@@ -8333,7 +6086,6 @@ instruct negL_reg_reg(iRegL dst, immL0 zero, iRegL src2, flagsReg icc) %{
   %}
   ins_pipe(ialu_zero_reg);
 %}
-#endif // !AARCH64
 
 // Multiplication Instructions
 // Integer Multiplication
@@ -8349,17 +6101,6 @@ instruct mulI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
   ins_pipe(imul_reg_reg);
 %}
 
-#ifdef AARCH64
-instruct mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
-  match(Set dst (MulL src1 src2));
-  size(4);
-  format %{ "MUL  $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ mul($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-  ins_pipe(imul_reg_reg);
-%}
-#else
 instruct mulL_lo1_hi2(iRegL dst, iRegL src1, iRegL src2) %{
   effect(DEF dst, USE src1, USE src2);
   size(4);
@@ -8401,22 +6142,9 @@ instruct mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
     mulL_lo1_lo2(dst, src1, src2);
   %}
 %}
-#endif // !AARCH64
 
 // Integer Division
 // Register Division
-#ifdef AARCH64
-instruct divI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
-  match(Set dst (DivI src1 src2));
-
-  size(4);
-  format %{ "SDIV    $dst,$src1,$src2\t! 32-bit" %}
-  ins_encode %{
-    __ sdiv_w($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-  ins_pipe(ialu_reg_reg); // FIXME
-%}
-#else
 instruct divI_reg_reg(R1RegI dst, R0RegI src1, R2RegI src2, LRRegP lr, flagsReg ccr) %{
   match(Set dst (DivI src1 src2));
   effect( KILL ccr, KILL src1, KILL src2, KILL lr);
@@ -8428,21 +6156,8 @@ instruct divI_reg_reg(R1RegI dst, R0RegI src1, R2RegI src2, LRRegP lr, flagsReg
   %}
   ins_pipe(sdiv_reg_reg);
 %}
-#endif
 
 // Register Long Division
-#ifdef AARCH64
-instruct divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
-  match(Set dst (DivL src1 src2));
-
-  size(4);
-  format %{ "SDIV    $dst,$src1,$src2" %}
-  ins_encode %{
-    __ sdiv($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-  ins_pipe(ialu_reg_reg); // FIXME
-%}
-#else
 instruct divL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{
   match(Set dst (DivL src1 src2));
   effect(CALL);
@@ -8454,38 +6169,9 @@ instruct divL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{
   %}
   ins_pipe(divL_reg_reg);
 %}
-#endif
 
 // Integer Remainder
 // Register Remainder
-#ifdef AARCH64
-#ifdef TODO
-instruct msubI_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
-  match(Set dst (SubI src1 (MulI src2 src3)));
-
-  size(4);
-  format %{ "MSUB    $dst,$src2,$src3,$src1\t! 32-bit\n\t" %}
-  ins_encode %{
-    __ msub_w($dst$$Register, $src2$$Register, $src3$$Register, $src1$$Register);
-  %}
-  ins_pipe(ialu_reg_reg); // FIXME
-%}
-#endif
-
-instruct modI_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI temp) %{
-  match(Set dst (ModI src1 src2));
-  effect(TEMP temp);
-
-  size(8);
-  format %{ "SDIV    $temp,$src1,$src2\t! 32-bit\n\t"
-            "MSUB    $dst,$src2,$temp,$src1\t! 32-bit\n\t" %}
-  ins_encode %{
-    __ sdiv_w($temp$$Register, $src1$$Register, $src2$$Register);
-    __ msub_w($dst$$Register, $src2$$Register, $temp$$Register, $src1$$Register);
-  %}
-  ins_pipe(ialu_reg_reg); // FIXME
-%}
-#else
 instruct modI_reg_reg(R0RegI dst, R0RegI src1, R2RegI src2, R1RegI temp, LRRegP lr, flagsReg ccr ) %{
   match(Set dst (ModI src1 src2));
   effect( KILL ccr, KILL temp, KILL src2, KILL lr);
@@ -8496,24 +6182,8 @@ instruct modI_reg_reg(R0RegI dst, R0RegI src1, R2RegI src2, R1RegI temp, LRRegP
   %}
   ins_pipe(sdiv_reg_reg);
 %}
-#endif
 
 // Register Long Remainder
-#ifdef AARCH64
-instruct modL_reg_reg(iRegL dst, iRegL src1, iRegL src2, iRegL temp) %{
-  match(Set dst (ModL src1 src2));
-  effect(TEMP temp);
-
-  size(8);
-  format %{ "SDIV    $temp,$src1,$src2\n\t"
-            "MSUB    $dst,$src2,$temp,$src1" %}
-  ins_encode %{
-    __ sdiv($temp$$Register, $src1$$Register, $src2$$Register);
-    __ msub($dst$$Register, $src2$$Register, $temp$$Register, $src1$$Register);
-  %}
-  ins_pipe(ialu_reg_reg); // FIXME
-%}
-#else
 instruct modL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{
   match(Set dst (ModL src1 src2));
   effect(CALL);
@@ -8525,7 +6195,6 @@ instruct modL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{
   %}
   ins_pipe(divL_reg_reg);
 %}
-#endif
 
 // Integer Shift Instructions
 
@@ -8534,17 +6203,10 @@ instruct shlI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
   match(Set dst (LShiftI src1 src2));
 
   size(4);
-#ifdef AARCH64
-  format %{ "LSLV   $dst,$src1,$src2\t! int" %}
-  ins_encode %{
-    __ lslv_w($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-#else
   format %{ "LSL  $dst,$src1,$src2 \n\t" %}
   ins_encode %{
     __ mov($dst$$Register, AsmOperand($src1$$Register, lsl, $src2$$Register));
   %}
-#endif
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -8553,21 +6215,13 @@ instruct shlI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
   match(Set dst (LShiftI src1 src2));
 
   size(4);
-#ifdef AARCH64
-  format %{ "LSL_w  $dst,$src1,$src2\t! int" %}
-  ins_encode %{
-    __ _lsl($dst$$Register, $src1$$Register, $src2$$constant);
-  %}
-#else
   format %{ "LSL    $dst,$src1,$src2\t! int" %}
   ins_encode %{
     __ logical_shift_left($dst$$Register, $src1$$Register, $src2$$constant);
   %}
-#endif
   ins_pipe(ialu_reg_imm);
 %}
 
-#ifndef AARCH64
 instruct shlL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{
   effect(USE_DEF dst, USE src1, USE src2);
   size(4);
@@ -8605,40 +6259,18 @@ instruct shlL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif // !AARCH64
 
 instruct shlL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
   match(Set dst (LShiftL src1 src2));
 
-#ifdef AARCH64
-  size(4);
-  format %{ "LSLV  $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ lslv($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-#else
   expand %{
     flagsReg ccr;
     shlL_reg_reg_overlap(dst, src1, src2, ccr);
     shlL_reg_reg_merge_hi(dst, src1, src2);
     shlL_reg_reg_merge_lo(dst, src1, src2);
   %}
-#endif
 %}
 
-#ifdef AARCH64
-instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{
-  match(Set dst (LShiftL src1 src2));
-
-  size(4);
-  format %{ "LSL    $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ logical_shift_left($dst$$Register, $src1$$Register, $src2$$constant);
-  %}
-  ins_pipe(ialu_reg_imm);
-%}
-#else
 // Register Shift Left Immediate
 instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{
   match(Set dst (LShiftL src1 src2));
@@ -8673,23 +6305,15 @@ instruct shlL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{
   %}
   ins_pipe(ialu_reg_imm);
 %}
-#endif // !AARCH64
 
 // Register Arithmetic Shift Right
 instruct sarI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
   match(Set dst (RShiftI src1 src2));
   size(4);
-#ifdef AARCH64
-  format %{ "ASRV   $dst,$src1,$src2\t! int" %}
-  ins_encode %{
-    __ asrv_w($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-#else
   format %{ "ASR    $dst,$src1,$src2\t! int" %}
   ins_encode %{
     __ mov($dst$$Register, AsmOperand($src1$$Register, asr, $src2$$Register));
   %}
-#endif
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -8698,21 +6322,13 @@ instruct sarI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
   match(Set dst (RShiftI src1 src2));
 
   size(4);
-#ifdef AARCH64
-  format %{ "ASR_w  $dst,$src1,$src2" %}
-  ins_encode %{
-    __ _asr_w($dst$$Register, $src1$$Register, $src2$$constant);
-  %}
-#else
   format %{ "ASR    $dst,$src1,$src2" %}
   ins_encode %{
     __ mov($dst$$Register, AsmOperand($src1$$Register, asr, $src2$$constant));
   %}
-#endif
   ins_pipe(ialu_reg_imm);
 %}
 
-#ifndef AARCH64
 // Register Shift Right Arithmetic Long
 instruct sarL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{
   effect(USE_DEF dst, USE src1, USE src2);
@@ -8751,41 +6367,19 @@ instruct sarL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif // !AARCH64
 
 instruct sarL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
   match(Set dst (RShiftL src1 src2));
 
-#ifdef AARCH64
-  size(4);
-  format %{ "ASRV  $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ asrv($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-#else
   expand %{
     flagsReg ccr;
     sarL_reg_reg_overlap(dst, src1, src2, ccr);
     sarL_reg_reg_merge_lo(dst, src1, src2);
     sarL_reg_reg_merge_hi(dst, src1, src2);
   %}
-#endif
 %}
 
 // Register Shift Left Immediate
-#ifdef AARCH64
-instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{
-  match(Set dst (RShiftL src1 src2));
-
-  size(4);
-  format %{ "ASR    $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ _asr($dst$$Register, $src1$$Register, $src2$$constant);
-  %}
-  ins_pipe(ialu_reg_imm);
-%}
-#else
 instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{
   match(Set dst (RShiftL src1 src2));
 
@@ -8819,23 +6413,15 @@ instruct sarL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{
   %}
   ins_pipe(ialu_reg_imm);
 %}
-#endif
 
 // Register Shift Right
 instruct shrI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
   match(Set dst (URShiftI src1 src2));
   size(4);
-#ifdef AARCH64
-  format %{ "LSRV   $dst,$src1,$src2\t! int" %}
-  ins_encode %{
-    __ lsrv_w($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-#else
   format %{ "LSR    $dst,$src1,$src2\t! int" %}
   ins_encode %{
     __ mov($dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$Register));
   %}
-#endif
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -8844,21 +6430,13 @@ instruct shrI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
   match(Set dst (URShiftI src1 src2));
 
   size(4);
-#ifdef AARCH64
-  format %{ "LSR_w  $dst,$src1,$src2" %}
-  ins_encode %{
-    __ _lsr_w($dst$$Register, $src1$$Register, $src2$$constant);
-  %}
-#else
   format %{ "LSR    $dst,$src1,$src2" %}
   ins_encode %{
     __ mov($dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$constant));
   %}
-#endif
   ins_pipe(ialu_reg_imm);
 %}
 
-#ifndef AARCH64
 // Register Shift Right
 instruct shrL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{
   effect(USE_DEF dst, USE src1, USE src2);
@@ -8897,41 +6475,19 @@ instruct shrL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif // !AARCH64
 
 instruct shrL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
   match(Set dst (URShiftL src1 src2));
 
-#ifdef AARCH64
-  size(4);
-  format %{ "LSRV  $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ lsrv($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-#else
   expand %{
     flagsReg ccr;
     shrL_reg_reg_overlap(dst, src1, src2, ccr);
     shrL_reg_reg_merge_lo(dst, src1, src2);
     shrL_reg_reg_merge_hi(dst, src1, src2);
   %}
-#endif
 %}
 
 // Register Shift Right Immediate
-#ifdef AARCH64
-instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{
-  match(Set dst (URShiftL src1 src2));
-
-  size(4);
-  format %{ "LSR    $dst,$src1,$src2" %}
-  ins_encode %{
-    __ _lsr($dst$$Register, $src1$$Register, $src2$$constant);
-  %}
-  ins_pipe(ialu_reg_imm);
-%}
-#else
 instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{
   match(Set dst (URShiftL src1 src2));
 
@@ -8966,7 +6522,6 @@ instruct shrL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{
   %}
   ins_pipe(ialu_reg_imm);
 %}
-#endif // !AARCH64
 
 
 instruct shrP_reg_imm5(iRegX dst, iRegP src1, immU5 src2) %{
@@ -9164,7 +6719,6 @@ instruct andI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct andshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (AndI src1 (LShiftI src2 src3)));
 
@@ -9175,7 +6729,6 @@ instruct andshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct andshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   match(Set dst (AndI src1 (LShiftI src2 src3)));
@@ -9188,7 +6741,6 @@ instruct andshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct andsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (AndI src1 (RShiftI src2 src3)));
 
@@ -9199,7 +6751,6 @@ instruct andsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct andsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   match(Set dst (AndI src1 (RShiftI src2 src3)));
@@ -9212,7 +6763,6 @@ instruct andsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct andshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (AndI src1 (URShiftI src2 src3)));
 
@@ -9223,7 +6773,6 @@ instruct andshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct andshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   match(Set dst (AndI src1 (URShiftI src2 src3)));
@@ -9248,7 +6797,6 @@ instruct andI_reg_limm(iRegI dst, iRegI src1, limmI src2) %{
   ins_pipe(ialu_reg_imm);
 %}
 
-#ifndef AARCH64
 instruct andI_reg_limmn(iRegI dst, iRegI src1, limmIn src2) %{
   match(Set dst (AndI src1 src2));
 
@@ -9259,43 +6807,21 @@ instruct andI_reg_limmn(iRegI dst, iRegI src1, limmIn src2) %{
   %}
   ins_pipe(ialu_reg_imm);
 %}
-#endif
 
 // Register And Long
 instruct andL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
   match(Set dst (AndL src1 src2));
 
   ins_cost(DEFAULT_COST);
-#ifdef AARCH64
-  size(4);
-  format %{ "AND    $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ andr($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-#else
   size(8);
   format %{ "AND    $dst,$src1,$src2\t! long" %}
   ins_encode %{
     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
     __ andr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
   %}
-#endif
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifdef AARCH64
-// Immediate And
-instruct andL_reg_limm(iRegL dst, iRegL src1, limmL src2) %{
-  match(Set dst (AndL src1 src2));
-
-  size(4);
-  format %{ "AND    $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ andr($dst$$Register, $src1$$Register, (uintx)$src2$$constant);
-  %}
-  ins_pipe(ialu_reg_imm);
-%}
-#else
 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
 // (hi($con$$constant), lo($con$$constant)) becomes
 instruct andL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
@@ -9309,7 +6835,6 @@ instruct andL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
   %}
   ins_pipe(ialu_reg_imm);
 %}
-#endif
 
 // Or Instructions
 // Register Or
@@ -9324,7 +6849,6 @@ instruct orI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct orshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (OrI src1 (LShiftI src2 src3)));
 
@@ -9335,7 +6859,6 @@ instruct orshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct orshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   match(Set dst (OrI src1 (LShiftI src2 src3)));
@@ -9348,7 +6871,6 @@ instruct orshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct orsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (OrI src1 (RShiftI src2 src3)));
 
@@ -9359,7 +6881,6 @@ instruct orsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct orsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   match(Set dst (OrI src1 (RShiftI src2 src3)));
@@ -9372,7 +6893,6 @@ instruct orsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct orshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (OrI src1 (URShiftI src2 src3)));
 
@@ -9383,7 +6903,6 @@ instruct orshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct orshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   match(Set dst (OrI src1 (URShiftI src2 src3)));
@@ -9414,13 +6933,6 @@ instruct orL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
   match(Set dst (OrL src1 src2));
 
   ins_cost(DEFAULT_COST);
-#ifdef AARCH64
-  size(4);
-  format %{ "OR     $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ orr($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-#else
   size(8);
   format %{ "OR     $dst.lo,$src1.lo,$src2.lo\t! long\n\t"
             "OR     $dst.hi,$src1.hi,$src2.hi" %}
@@ -9428,22 +6940,9 @@ instruct orL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
     __ orr($dst$$Register, $src1$$Register, $src2$$Register);
     __ orr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
   %}
-#endif
-  ins_pipe(ialu_reg_reg);
-%}
-
-#ifdef AARCH64
-instruct orL_reg_limm(iRegL dst, iRegL src1, limmL src2) %{
-  match(Set dst (OrL src1 src2));
-
-  size(4);
-  format %{ "ORR    $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ orr($dst$$Register, $src1$$Register, (uintx)$src2$$constant);
-  %}
-  ins_pipe(ialu_reg_imm);
+  ins_pipe(ialu_reg_reg);
 %}
-#else
+
 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
 // (hi($con$$constant), lo($con$$constant)) becomes
 instruct orL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
@@ -9458,7 +6957,6 @@ instruct orL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
   %}
   ins_pipe(ialu_reg_imm);
 %}
-#endif
 
 #ifdef TODO
 // Use SPRegP to match Rthread (TLS register) without spilling.
@@ -9488,7 +6986,6 @@ instruct xorI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct xorshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (XorI src1 (LShiftI src2 src3)));
 
@@ -9499,7 +6996,6 @@ instruct xorshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct xorshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   match(Set dst (XorI src1 (LShiftI src2 src3)));
@@ -9512,7 +7008,6 @@ instruct xorshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct xorsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (XorI src1 (RShiftI src2 src3)));
 
@@ -9523,7 +7018,6 @@ instruct xorsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct xorsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   match(Set dst (XorI src1 (RShiftI src2 src3)));
@@ -9536,7 +7030,6 @@ instruct xorsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifndef AARCH64
 instruct xorshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   match(Set dst (XorI src1 (URShiftI src2 src3)));
 
@@ -9547,7 +7040,6 @@ instruct xorshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{
   %}
   ins_pipe(ialu_reg_reg);
 %}
-#endif
 
 instruct xorshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{
   match(Set dst (XorI src1 (URShiftI src2 src3)));
@@ -9576,13 +7068,6 @@ instruct xorI_reg_imm(iRegI dst, iRegI src1, limmI src2) %{
 instruct xorL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
   match(Set dst (XorL src1 src2));
   ins_cost(DEFAULT_COST);
-#ifdef AARCH64
-  size(4);
-  format %{ "XOR     $dst,$src1,$src2\t! long" %}
-  ins_encode %{
-    __ eor($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-#else
   size(8);
   format %{ "XOR     $dst.hi,$src1.hi,$src2.hi\t! long\n\t"
             "XOR     $dst.lo,$src1.lo,$src2.lo\t! long" %}
@@ -9590,22 +7075,9 @@ instruct xorL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
     __ eor($dst$$Register, $src1$$Register, $src2$$Register);
     __ eor($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor());
   %}
-#endif
   ins_pipe(ialu_reg_reg);
 %}
 
-#ifdef AARCH64
-instruct xorL_reg_limmL(iRegL dst, iRegL src1, limmL con) %{
-  match(Set dst (XorL src1 con));
-  ins_cost(DEFAULT_COST);
-  size(4);
-  format %{ "EOR     $dst,$src1,$con\t! long" %}
-  ins_encode %{
-    __ eor($dst$$Register, $src1$$Register, (uintx)$con$$constant);
-  %}
-  ins_pipe(ialu_reg_imm);
-%}
-#else
 // TODO: try immLRot2 instead, (0, $con$$constant) becomes
 // (hi($con$$constant), lo($con$$constant)) becomes
 instruct xorL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
@@ -9620,22 +7092,11 @@ instruct xorL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
   %}
   ins_pipe(ialu_reg_imm);
 %}
-#endif // AARCH64
 
 //----------Convert to Boolean-------------------------------------------------
 instruct convI2B( iRegI dst, iRegI src, flagsReg ccr ) %{
   match(Set dst (Conv2B src));
   effect(KILL ccr);
-#ifdef AARCH64
-  size(8);
-  ins_cost(DEFAULT_COST*2);
-  format %{ "cmp_32 $src,ZR\n\t"
-            "cset_w $dst, ne" %}
-  ins_encode %{
-    __ cmp_32($src$$Register, ZR);
-    __ cset_w($dst$$Register, ne);
-  %}
-#else
   size(12);
   ins_cost(DEFAULT_COST*2);
   format %{ "TST    $src,$src \n\t"
@@ -9646,23 +7107,12 @@ instruct convI2B( iRegI dst, iRegI src, flagsReg ccr ) %{
     __ mov($dst$$Register, 0);
     __ mov($dst$$Register, 1, ne);
   %}
-#endif
   ins_pipe(ialu_reg_ialu);
 %}
 
 instruct convP2B( iRegI dst, iRegP src, flagsReg ccr ) %{
   match(Set dst (Conv2B src));
   effect(KILL ccr);
-#ifdef AARCH64
-  size(8);
-  ins_cost(DEFAULT_COST*2);
-  format %{ "CMP    $src,ZR\n\t"
-            "cset   $dst, ne" %}
-  ins_encode %{
-    __ cmp($src$$Register, ZR);
-    __ cset($dst$$Register, ne);
-  %}
-#else
   size(12);
   ins_cost(DEFAULT_COST*2);
   format %{ "TST    $src,$src \n\t"
@@ -9673,23 +7123,12 @@ instruct convP2B( iRegI dst, iRegP src, flagsReg ccr ) %{
     __ mov($dst$$Register, 0);
     __ mov($dst$$Register, 1, ne);
   %}
-#endif
   ins_pipe(ialu_reg_ialu);
 %}
 
 instruct cmpLTMask_reg_reg( iRegI dst, iRegI p, iRegI q, flagsReg ccr ) %{
   match(Set dst (CmpLTMask p q));
   effect( KILL ccr );
-#ifdef AARCH64
-  size(8);
-  ins_cost(DEFAULT_COST*2);
-  format %{ "CMP_w   $p,$q\n\t"
-            "CSETM_w $dst, lt" %}
-  ins_encode %{
-    __ cmp_w($p$$Register, $q$$Register);
-    __ csetm_w($dst$$Register, lt);
-  %}
-#else
   ins_cost(DEFAULT_COST*3);
   format %{ "CMP    $p,$q\n\t"
             "MOV    $dst, #0\n\t"
@@ -9699,23 +7138,12 @@ instruct cmpLTMask_reg_reg( iRegI dst, iRegI p, iRegI q, flagsReg ccr ) %{
     __ mov($dst$$Register, 0);
     __ mvn($dst$$Register, 0, lt);
   %}
-#endif
   ins_pipe(ialu_reg_reg_ialu);
 %}
 
 instruct cmpLTMask_reg_imm( iRegI dst, iRegI p, aimmI q, flagsReg ccr ) %{
   match(Set dst (CmpLTMask p q));
   effect( KILL ccr );
-#ifdef AARCH64
-  size(8);
-  ins_cost(DEFAULT_COST*2);
-  format %{ "CMP_w   $p,$q\n\t"
-            "CSETM_w $dst, lt" %}
-  ins_encode %{
-    __ cmp_w($p$$Register, $q$$constant);
-    __ csetm_w($dst$$Register, lt);
-  %}
-#else
   ins_cost(DEFAULT_COST*3);
   format %{ "CMP    $p,$q\n\t"
             "MOV    $dst, #0\n\t"
@@ -9725,27 +7153,9 @@ instruct cmpLTMask_reg_imm( iRegI dst, iRegI p, aimmI q, flagsReg ccr ) %{
     __ mov($dst$$Register, 0);
     __ mvn($dst$$Register, 0, lt);
   %}
-#endif
   ins_pipe(ialu_reg_reg_ialu);
 %}
 
-#ifdef AARCH64
-instruct cadd_cmpLTMask3( iRegI dst, iRegI p, iRegI q, iRegI y, iRegI x, flagsReg ccr ) %{
-  match(Set dst (AddI (AndI (CmpLTMask p q) y) x));
-  effect( TEMP dst, KILL ccr );
-  size(12);
-  ins_cost(DEFAULT_COST*3);
-  format %{ "CMP_w  $p,$q\n\t"
-            "ADD_w  $dst,$y,$x\n\t"
-            "CSEL_w $dst,$dst,$x,lt" %}
-  ins_encode %{
-    __ cmp_w($p$$Register, $q$$Register);
-    __ add_w($dst$$Register, $y$$Register, $x$$Register);
-    __ csel_w($dst$$Register, $dst$$Register, $x$$Register, lt);
-  %}
-  ins_pipe( cadd_cmpltmask );
-%}
-#else
 instruct cadd_cmpLTMask3( iRegI p, iRegI q, iRegI y, iRegI z, flagsReg ccr ) %{
   match(Set z (AddI (AndI (CmpLTMask p q) y) z));
   effect( KILL ccr );
@@ -9758,25 +7168,7 @@ instruct cadd_cmpLTMask3( iRegI p, iRegI q, iRegI y, iRegI z, flagsReg ccr ) %{
   %}
   ins_pipe( cadd_cmpltmask );
 %}
-#endif
 
-#ifdef AARCH64
-instruct cadd_cmpLTMask4( iRegI dst, iRegI p, aimmI q, iRegI y, iRegI x, flagsReg ccr ) %{
-  match(Set dst (AddI (AndI (CmpLTMask p q) y) x));
-  effect( TEMP dst, KILL ccr );
-  size(12);
-  ins_cost(DEFAULT_COST*3);
-  format %{ "CMP_w  $p,$q\n\t"
-            "ADD_w  $dst,$y,$x\n\t"
-            "CSEL_w $dst,$dst,$x,lt" %}
-  ins_encode %{
-    __ cmp_w($p$$Register, $q$$constant);
-    __ add_w($dst$$Register, $y$$Register, $x$$Register);
-    __ csel_w($dst$$Register, $dst$$Register, $x$$Register, lt);
-  %}
-  ins_pipe( cadd_cmpltmask );
-%}
-#else
 // FIXME: remove unused "dst"
 instruct cadd_cmpLTMask4( iRegI dst, iRegI p, aimmI q, iRegI y, iRegI z, flagsReg ccr ) %{
   match(Set z (AddI (AndI (CmpLTMask p q) y) z));
@@ -9790,25 +7182,7 @@ instruct cadd_cmpLTMask4( iRegI dst, iRegI p, aimmI q, iRegI y, iRegI z, flagsRe
   %}
   ins_pipe( cadd_cmpltmask );
 %}
-#endif // !AARCH64
 
-#ifdef AARCH64
-instruct cadd_cmpLTMask( iRegI dst, iRegI p, iRegI q, iRegI y, flagsReg ccr ) %{
-  match(Set dst (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
-  effect( TEMP dst, KILL ccr );
-  size(12);
-  ins_cost(DEFAULT_COST*3);
-  format %{ "SUBS_w $p,$p,$q\n\t"
-            "ADD_w  $dst,$y,$p\n\t"
-            "CSEL_w $dst,$dst,$p,lt" %}
-  ins_encode %{
-    __ subs_w($p$$Register, $p$$Register, $q$$Register);
-    __ add_w($dst$$Register, $y$$Register, $p$$Register);
-    __ csel_w($dst$$Register, $dst$$Register, $p$$Register, lt);
-  %}
-  ins_pipe( cadd_cmpltmask ); // FIXME
-%}
-#else
 instruct cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, flagsReg ccr ) %{
   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
   effect( KILL ccr );
@@ -9821,7 +7195,6 @@ instruct cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, flagsReg ccr ) %{
   %}
   ins_pipe( cadd_cmpltmask );
 %}
-#endif
 
 //----------Arithmetic Conversion Instructions---------------------------------
 // The conversions operations are all Alpha sorted.  Please keep it that way!
@@ -9839,27 +7212,6 @@ instruct convD2F_reg(regF dst, regD src) %{
 // Convert a double to an int in a float register.
 // If the double is a NAN, stuff a zero in instead.
 
-#ifdef AARCH64
-instruct convD2I_reg_reg(iRegI dst, regD src) %{
-  match(Set dst (ConvD2I src));
-  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
-  format %{ "FCVTZS_wd $dst, $src" %}
-  ins_encode %{
-    __ fcvtzs_wd($dst$$Register, $src$$FloatRegister);
-  %}
-  ins_pipe(fcvtD2I);
-%}
-
-instruct convD2L_reg_reg(iRegL dst, regD src) %{
-  match(Set dst (ConvD2L src));
-  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
-  format %{ "FCVTZS_xd $dst, $src" %}
-  ins_encode %{
-    __ fcvtzs_xd($dst$$Register, $src$$FloatRegister);
-  %}
-  ins_pipe(fcvtD2L);
-%}
-#else
 instruct convD2I_reg_reg(iRegI dst, regD src, regF tmp) %{
   match(Set dst (ConvD2I src));
   effect( TEMP tmp );
@@ -9872,12 +7224,10 @@ instruct convD2I_reg_reg(iRegI dst, regD src, regF tmp) %{
   %}
   ins_pipe(fcvtD2I);
 %}
-#endif
 
 // Convert a double to a long in a double register.
 // If the double is a NAN, stuff a zero in instead.
 
-#ifndef AARCH64
 // Double to Long conversion
 instruct convD2L_reg(R0R1RegL dst, regD src) %{
   match(Set dst (ConvD2L src));
@@ -9897,7 +7247,6 @@ instruct convD2L_reg(R0R1RegL dst, regD src) %{
   %}
   ins_pipe(fcvtD2L);
 %}
-#endif
 
 instruct convF2D_reg(regD dst, regF src) %{
   match(Set dst (ConvF2D src));
@@ -9909,29 +7258,6 @@ instruct convF2D_reg(regD dst, regF src) %{
   ins_pipe(fcvtF2D);
 %}
 
-#ifdef AARCH64
-instruct convF2I_reg_reg(iRegI dst, regF src) %{
-  match(Set dst (ConvF2I src));
-  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
-  size(4);
-  format %{ "FCVTZS_ws $dst, $src" %}
-  ins_encode %{
-    __ fcvtzs_ws($dst$$Register, $src$$FloatRegister);
-  %}
-  ins_pipe(fcvtF2I);
-%}
-
-instruct convF2L_reg_reg(iRegL dst, regF src) %{
-  match(Set dst (ConvF2L src));
-  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
-  size(4);
-  format %{ "FCVTZS_xs $dst, $src" %}
-  ins_encode %{
-    __ fcvtzs_xs($dst$$Register, $src$$FloatRegister);
-  %}
-  ins_pipe(fcvtF2L);
-%}
-#else
 instruct convF2I_reg_reg(iRegI dst, regF src, regF tmp) %{
   match(Set dst (ConvF2I src));
   effect( TEMP tmp );
@@ -9965,20 +7291,7 @@ instruct convF2L_reg(R0R1RegL dst, regF src, R0RegI arg1) %{
   %}
   ins_pipe(fcvtF2L);
 %}
-#endif
 
-#ifdef AARCH64
-instruct convI2D_reg_reg(iRegI src, regD dst) %{
-  match(Set dst (ConvI2D src));
-  ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME
-  size(4);
-  format %{ "SCVTF_dw $dst,$src" %}
-  ins_encode %{
-      __ scvtf_dw($dst$$FloatRegister, $src$$Register);
-  %}
-  ins_pipe(fcvtI2D);
-%}
-#else
 instruct convI2D_reg_reg(iRegI src, regD_low dst) %{
   match(Set dst (ConvI2D src));
   ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME
@@ -9991,18 +7304,10 @@ instruct convI2D_reg_reg(iRegI src, regD_low dst) %{
   %}
   ins_pipe(fcvtI2D);
 %}
-#endif
 
 instruct convI2F_reg_reg( regF dst, iRegI src ) %{
   match(Set dst (ConvI2F src));
   ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME
-#ifdef AARCH64
-  size(4);
-  format %{ "SCVTF_sw $dst,$src" %}
-  ins_encode %{
-      __ scvtf_sw($dst$$FloatRegister, $src$$Register);
-  %}
-#else
   size(8);
   format %{ "FMSR     $dst,$src \n\t"
             "FSITOS   $dst, $dst"%}
@@ -10010,19 +7315,11 @@ instruct convI2F_reg_reg( regF dst, iRegI src ) %{
       __ fmsr($dst$$FloatRegister, $src$$Register);
       __ fsitos($dst$$FloatRegister, $dst$$FloatRegister);
   %}
-#endif
   ins_pipe(fcvtI2F);
 %}
 
 instruct convI2L_reg(iRegL dst, iRegI src) %{
   match(Set dst (ConvI2L src));
-#ifdef AARCH64
-  size(4);
-  format %{ "SXTW   $dst,$src\t! int->long" %}
-  ins_encode %{
-    __ sxtw($dst$$Register, $src$$Register);
-  %}
-#else
   size(8);
   format %{ "MOV    $dst.lo, $src \n\t"
             "ASR    $dst.hi,$src,31\t! int->long" %}
@@ -10030,20 +7327,12 @@ instruct convI2L_reg(iRegL dst, iRegI src) %{
     __ mov($dst$$Register, $src$$Register);
     __ mov($dst$$Register->successor(), AsmOperand($src$$Register, asr, 31));
   %}
-#endif
   ins_pipe(ialu_reg_reg);
 %}
 
 // Zero-extend convert int to long
 instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask ) %{
   match(Set dst (AndL (ConvI2L src) mask) );
-#ifdef AARCH64
-  size(4);
-  format %{ "mov_w  $dst,$src\t! zero-extend int to long"  %}
-  ins_encode %{
-    __ mov_w($dst$$Register, $src$$Register);
-  %}
-#else
   size(8);
   format %{ "MOV    $dst.lo,$src.lo\t! zero-extend int to long\n\t"
             "MOV    $dst.hi, 0"%}
@@ -10051,20 +7340,12 @@ instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask ) %{
     __ mov($dst$$Register, $src$$Register);
     __ mov($dst$$Register->successor(), 0);
   %}
-#endif
   ins_pipe(ialu_reg_reg);
 %}
 
 // Zero-extend long
 instruct zerox_long(iRegL dst, iRegL src, immL_32bits mask ) %{
   match(Set dst (AndL src mask) );
-#ifdef AARCH64
-  size(4);
-  format %{ "mov_w  $dst,$src\t! zero-extend long"  %}
-  ins_encode %{
-    __ mov_w($dst$$Register, $src$$Register);
-  %}
-#else
   size(8);
   format %{ "MOV    $dst.lo,$src.lo\t! zero-extend long\n\t"
             "MOV    $dst.hi, 0"%}
@@ -10072,7 +7353,6 @@ instruct zerox_long(iRegL dst, iRegL src, immL_32bits mask ) %{
     __ mov($dst$$Register, $src$$Register);
     __ mov($dst$$Register->successor(), 0);
   %}
-#endif
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -10107,17 +7387,10 @@ instruct MoveD2L_reg_reg(iRegL dst, regD src) %{
   ins_cost(MEMORY_REF_COST); // FIXME
 
   size(4);
-#ifdef AARCH64
-  format %{ "FMOV_xd  $dst,$src\t! MoveD2L" %}
-  ins_encode %{
-    __ fmov_xd($dst$$Register, $src$$FloatRegister);
-  %}
-#else
   format %{ "FMRRD    $dst,$src\t! MoveD2L" %}
   ins_encode %{
     __ fmrrd($dst$$Register, $dst$$Register->successor(), $src$$FloatRegister);
   %}
-#endif
   ins_pipe(iload_mem); // FIXME
 %}
 
@@ -10127,46 +7400,16 @@ instruct MoveL2D_reg_reg(regD dst, iRegL src) %{
   ins_cost(MEMORY_REF_COST); // FIXME
 
   size(4);
-#ifdef AARCH64
-  format %{ "FMOV_dx $dst,$src\t! MoveL2D" %}
-  ins_encode %{
-    __ fmov_dx($dst$$FloatRegister, $src$$Register);
-  %}
-#else
   format %{ "FMDRR   $dst,$src\t! MoveL2D" %}
   ins_encode %{
     __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register->successor());
   %}
-#endif
   ins_pipe(ialu_reg_reg); // FIXME
 %}
 
 //-----------
 // Long to Double conversion
 
-#ifdef AARCH64
-instruct convL2D(regD dst, iRegL src) %{
-  match(Set dst (ConvL2D src));
-  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
-  size(4);
-  format %{ "SCVTF_dx $dst, $src" %}
-  ins_encode %{
-    __ scvtf_dx($dst$$FloatRegister, $src$$Register);
-  %}
-  ins_pipe(fcvtL2D);
-%}
-
-instruct convL2F(regF dst, iRegL src) %{
-  match(Set dst (ConvL2F src));
-  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME
-  size(4);
-  format %{ "SCVTF_sx $dst, $src" %}
-  ins_encode %{
-    __ scvtf_sx($dst$$FloatRegister, $src$$Register);
-  %}
-  ins_pipe(fcvtL2F);
-%}
-#else
 // Magic constant, 0x43300000
 instruct loadConI_x43300000(iRegI dst) %{
   effect(DEF dst);
@@ -10212,7 +7455,6 @@ instruct regDHi_regDLo_to_regD(regD_low dst, regD_low src1, regD_low src2) %{
   ins_pipe(faddD_reg_reg);
 %}
 
-#ifndef AARCH64
 // Convert integer in high half of a double register (in the lower half of
 // the double register file) to double
 instruct convI2D_regDHi_regD(regD dst, regD_low src) %{
@@ -10224,7 +7466,6 @@ instruct convI2D_regDHi_regD(regD dst, regD_low src) %{
   %}
   ins_pipe(fcvtLHi2D);
 %}
-#endif
 
 // Add float double precision
 instruct addD_regD_regD(regD dst, regD src1, regD src2) %{
@@ -10315,26 +7556,17 @@ instruct convL2D_reg_slow_fxtof(regD dst, iRegL src) %{
     addD_regD_regD(dst, tmp3, tmp4);
   %}
 %}
-#endif // !AARCH64
 
 instruct convL2I_reg(iRegI dst, iRegL src) %{
   match(Set dst (ConvL2I src));
   size(4);
-#ifdef AARCH64
-  format %{ "MOV_w  $dst,$src\t! long->int" %}
-  ins_encode %{
-    __ mov_w($dst$$Register, $src$$Register);
-  %}
-#else
   format %{ "MOV    $dst,$src.lo\t! long->int" %}
   ins_encode %{
     __ mov($dst$$Register, $src$$Register);
   %}
-#endif
   ins_pipe(ialu_move_reg_I_to_L);
 %}
 
-#ifndef AARCH64
 // Register Shift Right Immediate
 instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt) %{
   match(Set dst (ConvL2I (RShiftL src cnt)));
@@ -10349,7 +7581,6 @@ instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt) %{
   %}
   ins_pipe(ialu_reg_imm);
 %}
-#endif
 
 
 //----------Control Flow Instructions------------------------------------------
@@ -10428,7 +7659,6 @@ instruct testI_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immI0 zero
   ins_pipe(ialu_cconly_reg_reg_zero);
 %}
 
-#ifndef AARCH64
 instruct testshlI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{
   match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero));
   size(4);
@@ -10439,7 +7669,6 @@ instruct testshlI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iReg
   %}
   ins_pipe(ialu_cconly_reg_reg_zero);
 %}
-#endif
 
 instruct testshlI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{
   match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero));
@@ -10452,7 +7681,6 @@ instruct testshlI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU
   ins_pipe(ialu_cconly_reg_reg_zero);
 %}
 
-#ifndef AARCH64
 instruct testsarI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{
   match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero));
   size(4);
@@ -10463,7 +7691,6 @@ instruct testsarI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iReg
   %}
   ins_pipe(ialu_cconly_reg_reg_zero);
 %}
-#endif
 
 instruct testsarI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{
   match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero));
@@ -10476,7 +7703,6 @@ instruct testsarI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU
   ins_pipe(ialu_cconly_reg_reg_zero);
 %}
 
-#ifndef AARCH64
 instruct testshrI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{
   match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero));
   size(4);
@@ -10487,7 +7713,6 @@ instruct testshrI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iReg
   %}
   ins_pipe(ialu_cconly_reg_reg_zero);
 %}
-#endif
 
 instruct testshrI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{
   match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero));
@@ -10511,31 +7736,6 @@ instruct testI_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, limmI op2, immI0 zero
   ins_pipe(ialu_cconly_reg_imm_zero);
 %}
 
-#ifdef AARCH64
-instruct compL_reg_reg(flagsReg xcc, iRegL op1, iRegL op2)
-%{
-  match(Set xcc (CmpL op1 op2));
-  effect( DEF xcc, USE op1, USE op2 );
-
-  size(4);
-  format %{ "CMP     $op1,$op2\t! long" %}
-  ins_encode %{
-    __ cmp($op1$$Register, $op2$$Register);
-  %}
-  ins_pipe(ialu_cconly_reg_reg);
-%}
-
-instruct compUL_iReg(flagsRegU xcc, iRegL op1, iRegL op2) %{
-  match(Set xcc (CmpUL op1 op2));
-
-  size(4);
-  format %{ "CMP     $op1,$op2\t! unsigned long" %}
-  ins_encode %{
-    __ cmp($op1$$Register, $op2$$Register);
-  %}
-  ins_pipe(ialu_cconly_reg_reg);
-%}
-#else
 instruct compL_reg_reg_LTGE(flagsRegL_LTGE xcc, iRegL op1, iRegL op2, iRegL tmp) %{
   match(Set xcc (CmpL op1 op2));
   effect( DEF xcc, USE op1, USE op2, TEMP tmp );
@@ -10563,35 +7763,7 @@ instruct compUL_reg_reg_LTGE(flagsRegUL_LTGE xcc, iRegL op1, iRegL op2, iRegL tm
   %}
   ins_pipe(ialu_cconly_reg_reg);
 %}
-#endif
-
-#ifdef AARCH64
-instruct compL_reg_con(flagsReg xcc, iRegL op1, aimmL con) %{
-  match(Set xcc (CmpL op1 con));
-  effect( DEF xcc, USE op1, USE con );
-
-  size(8);
-  format %{ "CMP     $op1,$con\t\t! long"  %}
-  ins_encode %{
-    __ cmp($op1$$Register, $con$$constant);
-  %}
-
-  ins_pipe(ialu_cconly_reg_imm);
-%}
-
-instruct compUL_reg_con(flagsRegU xcc, iRegL op1, aimmL con) %{
-  match(Set xcc (CmpUL op1 con));
-  effect(DEF xcc, USE op1, USE con);
-
-  size(8);
-  format %{ "CMP     $op1,$con\t\t! unsigned long"  %}
-  ins_encode %{
-    __ cmp($op1$$Register, $con$$constant);
-  %}
 
-  ins_pipe(ialu_cconly_reg_imm);
-%}
-#else
 instruct compL_reg_reg_EQNE(flagsRegL_EQNE xcc, iRegL op1, iRegL op2) %{
   match(Set xcc (CmpL op1 op2));
   effect( DEF xcc, USE op1, USE op2 );
@@ -10749,7 +7921,6 @@ instruct compUL_reg_con_LEGT(flagsRegUL_LEGT xcc, iRegL op1, immLlowRot con, iRe
 
   ins_pipe(ialu_cconly_reg_reg);
 %}
-#endif
 
 /* instruct testL_reg_reg(flagsRegL xcc, iRegL op1, iRegL op2, immL0 zero) %{ */
 /*   match(Set xcc (CmpL (AndL op1 op2) zero)); */
@@ -10857,13 +8028,6 @@ instruct cmpF_cc(flagsRegF fcc, flagsReg icc, regF src1, regF src2) %{
   match(Set icc (CmpF src1 src2));
   effect(KILL fcc);
 
-#ifdef AARCH64
-  size(4);
-  format %{ "FCMP_s  $src1,$src2" %}
-  ins_encode %{
-    __ fcmp_s($src1$$FloatRegister, $src2$$FloatRegister);
-  %}
-#else
   size(8);
   format %{ "FCMPs  $src1,$src2\n\t"
             "FMSTAT" %}
@@ -10871,7 +8035,6 @@ instruct cmpF_cc(flagsRegF fcc, flagsReg icc, regF src1, regF src2) %{
     __ fcmps($src1$$FloatRegister, $src2$$FloatRegister);
     __ fmstat();
   %}
-#endif
   ins_pipe(faddF_fcc_reg_reg_zero);
 %}
 
@@ -10879,13 +8042,6 @@ instruct cmpF0_cc(flagsRegF fcc, flagsReg icc, regF src1, immF0 src2) %{
   match(Set icc (CmpF src1 src2));
   effect(KILL fcc);
 
-#ifdef AARCH64
-  size(4);
-  format %{ "FCMP0_s $src1" %}
-  ins_encode %{
-    __ fcmp0_s($src1$$FloatRegister);
-  %}
-#else
   size(8);
   format %{ "FCMPs  $src1,$src2\n\t"
             "FMSTAT" %}
@@ -10893,7 +8049,6 @@ instruct cmpF0_cc(flagsRegF fcc, flagsReg icc, regF src1, immF0 src2) %{
     __ fcmpzs($src1$$FloatRegister);
     __ fmstat();
   %}
-#endif
   ins_pipe(faddF_fcc_reg_reg_zero);
 %}
 
@@ -10901,13 +8056,6 @@ instruct cmpD_cc(flagsRegF fcc, flagsReg icc, regD src1, regD src2) %{
   match(Set icc (CmpD src1 src2));
   effect(KILL fcc);
 
-#ifdef AARCH64
-  size(4);
-  format %{ "FCMP_d $src1,$src2" %}
-  ins_encode %{
-    __ fcmp_d($src1$$FloatRegister, $src2$$FloatRegister);
-  %}
-#else
   size(8);
   format %{ "FCMPd  $src1,$src2 \n\t"
             "FMSTAT" %}
@@ -10915,109 +8063,23 @@ instruct cmpD_cc(flagsRegF fcc, flagsReg icc, regD src1, regD src2) %{
     __ fcmpd($src1$$FloatRegister, $src2$$FloatRegister);
     __ fmstat();
   %}
-#endif
   ins_pipe(faddD_fcc_reg_reg_zero);
 %}
 
 instruct cmpD0_cc(flagsRegF fcc, flagsReg icc, regD src1, immD0 src2) %{
   match(Set icc (CmpD src1 src2));
-  effect(KILL fcc);
-
-#ifdef AARCH64
-  size(8);
-  format %{ "FCMP0_d $src1" %}
-  ins_encode %{
-    __ fcmp0_d($src1$$FloatRegister);
-  %}
-#else
-  size(8);
-  format %{ "FCMPZd  $src1,$src2 \n\t"
-            "FMSTAT" %}
-  ins_encode %{
-    __ fcmpzd($src1$$FloatRegister);
-    __ fmstat();
-  %}
-#endif
-  ins_pipe(faddD_fcc_reg_reg_zero);
-%}
-
-#ifdef AARCH64
-// Compare floating, generate -1,0,1
-instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsReg icc) %{
-  match(Set dst (CmpF3 src1 src2));
-  // effect(KILL fcc); // nobody cares if flagsRegF is killed
-  effect(KILL icc);
-  ins_cost(DEFAULT_COST*3); // FIXME
-  size(12);
-  format %{ "FCMP_s $src1,$src2\n\t"
-            "CSET   $dst, gt\n\t"
-            "CSINV  $dst, $dst, ZR, ge" %}
-  ins_encode %{
-    Register dst = $dst$$Register;
-    __ fcmp_s($src1$$FloatRegister, $src2$$FloatRegister);
-    __ cset(dst, gt);            // 1 if '>', else 0
-    __ csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
-  %}
-  ins_pipe( floating_cmp ); // FIXME
-%}
-
-// Compare floating, generate -1,0,1
-instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsReg icc) %{
-  match(Set dst (CmpD3 src1 src2));
-  // effect(KILL fcc); // nobody cares if flagsRegF is killed
-  effect(KILL icc);
-  ins_cost(DEFAULT_COST*3); // FIXME
-  size(12);
-  format %{ "FCMP_d $src1,$src2\n\t"
-            "CSET   $dst, gt\n\t"
-            "CSINV  $dst, $dst, ZR, ge" %}
-  ins_encode %{
-    Register dst = $dst$$Register;
-    __ fcmp_d($src1$$FloatRegister, $src2$$FloatRegister);
-    __ cset(dst, gt);            // 1 if '>', else 0
-    __ csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
-  %}
-  ins_pipe( floating_cmp ); // FIXME
-%}
-
-// Compare floating, generate -1,0,1
-instruct cmpF0_reg(iRegI dst, regF src1, immF0 src2, flagsReg icc) %{
-  match(Set dst (CmpF3 src1 src2));
-  // effect(KILL fcc); // nobody cares if flagsRegF is killed
-  effect(KILL icc);
-  ins_cost(DEFAULT_COST*3); // FIXME
-  size(12);
-  format %{ "FCMP0_s $src1\n\t"
-            "CSET   $dst, gt\n\t"
-            "CSINV  $dst, $dst, ZR, ge" %}
-  ins_encode %{
-    Register dst = $dst$$Register;
-    __ fcmp0_s($src1$$FloatRegister);
-    __ cset(dst, gt);            // 1 if '>', else 0
-    __ csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
-  %}
-  ins_pipe( floating_cmp ); // FIXME
-%}
+  effect(KILL fcc);
 
-// Compare floating, generate -1,0,1
-instruct cmpD0_reg(iRegI dst, regD src1, immD0 src2, flagsReg icc) %{
-  match(Set dst (CmpD3 src1 src2));
-  // effect(KILL fcc); // nobody cares if flagsRegF is killed
-  effect(KILL icc);
-  ins_cost(DEFAULT_COST*3); // FIXME
-  size(12);
-  format %{ "FCMP0_d $src1\n\t"
-            "CSET   $dst, gt\n\t"
-            "CSINV  $dst, $dst, ZR, ge" %}
+  size(8);
+  format %{ "FCMPZd  $src1,$src2 \n\t"
+            "FMSTAT" %}
   ins_encode %{
-    Register dst = $dst$$Register;
-    __ fcmp0_d($src1$$FloatRegister);
-    __ cset(dst, gt);            // 1 if '>', else 0
-    __ csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
+    __ fcmpzd($src1$$FloatRegister);
+    __ fmstat();
   %}
-  ins_pipe( floating_cmp ); // FIXME
+  ins_pipe(faddD_fcc_reg_reg_zero);
 %}
-#else
+
 // Compare floating, generate -1,0,1
 instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsRegF fcc) %{
   match(Set dst (CmpF3 src1 src2));
@@ -11094,7 +8156,6 @@ instruct cmpD0_reg(iRegI dst, regD src1, immD0 src2, flagsRegF fcc) %{
   %}
   ins_pipe( floating_cmp );
 %}
-#endif // !AARCH64
 
 //----------Branches---------------------------------------------------------
 // Jump
@@ -11176,61 +8237,6 @@ instruct branchCon_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, label labl) %{
 %}
 #endif
 
-#ifdef AARCH64
-instruct cbzI(cmpOp cmp, iRegI op1, immI0 op2, label labl) %{
-  match(If cmp (CmpI op1 op2));
-  effect(USE labl);
-  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
-            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
-  size(4);
-  ins_cost(BRANCH_COST);
-  format %{ "CB{N}Z $op1, $labl\t! int $cmp" %}
-  ins_encode %{
-    if ($cmp$$cmpcode == eq) {
-      __ cbz_w($op1$$Register, *($labl$$label));
-    } else {
-      __ cbnz_w($op1$$Register, *($labl$$label));
-    }
-  %}
-  ins_pipe(br_cc); // FIXME
-%}
-
-instruct cbzP(cmpOpP cmp, iRegP op1, immP0 op2, label labl) %{
-  match(If cmp (CmpP op1 op2));
-  effect(USE labl);
-  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
-            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
-  size(4);
-  ins_cost(BRANCH_COST);
-  format %{ "CB{N}Z $op1, $labl\t! ptr $cmp" %}
-  ins_encode %{
-    if ($cmp$$cmpcode == eq) {
-      __ cbz($op1$$Register, *($labl$$label));
-    } else {
-      __ cbnz($op1$$Register, *($labl$$label));
-    }
-  %}
-  ins_pipe(br_cc); // FIXME
-%}
-
-instruct cbzL(cmpOpL cmp, iRegL op1, immL0 op2, label labl) %{
-  match(If cmp (CmpL op1 op2));
-  effect(USE labl);
-  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq ||
-            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
-  size(4);
-  ins_cost(BRANCH_COST);
-  format %{ "CB{N}Z $op1, $labl\t! long $cmp" %}
-  ins_encode %{
-    if ($cmp$$cmpcode == eq) {
-      __ cbz($op1$$Register, *($labl$$label));
-    } else {
-      __ cbnz($op1$$Register, *($labl$$label));
-    }
-  %}
-  ins_pipe(br_cc); // FIXME
-%}
-#endif
 
 instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{
   match(If cmp icc);
@@ -11258,7 +8264,6 @@ instruct branchConP(cmpOpP cmp, flagsRegP pcc, label labl) %{
   ins_pipe(br_cc);
 %}
 
-#ifndef AARCH64
 instruct branchConL_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, label labl) %{
   match(If cmp xcc);
   effect(USE labl);
@@ -11342,7 +8347,6 @@ instruct branchConUL_LEGT(cmpOpUL_commute cmp, flagsRegUL_LEGT xcc, label labl)
   %}
   ins_pipe(br_cc);
 %}
-#endif
 
 instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{
   match(CountedLoopEnd cmp icc);
@@ -11390,26 +8394,6 @@ instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{
 
 // Manifest a CmpL3 result in an integer register.  Very painful.
 // This is the test to avoid.
-#ifdef AARCH64
-instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr) %{
-  match(Set dst (CmpL3 src1 src2));
-  // effect(KILL fcc); // nobody cares if flagsRegF is killed
-  effect(KILL ccr);
-  ins_cost(DEFAULT_COST*3); // FIXME
-  size(12);
-  format %{ "CMP    $src1,$src2\n\t"
-            "CSET   $dst, gt\n\t"
-            "CSINV  $dst, $dst, ZR, ge" %}
-  ins_encode %{
-    Register dst = $dst$$Register;
-    __ cmp($src1$$Register, $src2$$Register);
-    __ cset(dst, gt);            // 1 if '>', else 0
-    __ csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
-  %}
-  ins_pipe( ialu_cconly_reg_reg ); // FIXME
-%}
-// TODO cmpL3_reg_imm
-#else
 instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr ) %{
   match(Set dst (CmpL3 src1 src2) );
   effect( KILL ccr );
@@ -11437,9 +8421,7 @@ instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr ) %{
   %}
   ins_pipe(cmpL_reg);
 %}
-#endif
 
-#ifndef AARCH64
 // Conditional move
 instruct cmovLL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegL dst, iRegL src) %{
   match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
@@ -11527,9 +8509,7 @@ instruct cmovLL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegL dst, immL
   %}
   ins_pipe(ialu_imm);
 %}
-#endif // !AARCH64
 
-#ifndef AARCH64
 instruct cmovIL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, iRegI src) %{
   match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
@@ -11568,9 +8548,7 @@ instruct cmovIL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegI dst, iReg
   %}
   ins_pipe(ialu_reg);
 %}
-#endif // !AARCH64
 
-#ifndef AARCH64
 instruct cmovIL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, immI16 src) %{
   match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
   predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
@@ -11756,25 +8734,9 @@ instruct cmovDL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, regD dst, regD
   %}
   ins_pipe(int_conditional_float_move);
 %}
-#endif // !AARCH64
 
 // ============================================================================
 // Safepoint Instruction
-#ifdef AARCH64
-instruct safePoint_poll(iRegP poll, flagsReg icc, RtempRegP tmp) %{
-  match(SafePoint poll);
-  // The handler stub kills Rtemp
-  effect(USE poll, KILL tmp, KILL icc);
-
-  size(4);
-  format %{ "LDR   ZR,[$poll]\t! Safepoint: poll for GC" %}
-  ins_encode %{
-    __ relocate(relocInfo::poll_type);
-    __ ldr(ZR, Address($poll$$Register));
-  %}
-  ins_pipe(loadPollP);
-%}
-#else
 // rather than KILL R12, it would be better to use any reg as
 // TEMP. Can't do that at this point because it crashes the compiler
 instruct safePoint_poll(iRegP poll, R12RegI tmp, flagsReg icc) %{
@@ -11789,7 +8751,6 @@ instruct safePoint_poll(iRegP poll, R12RegI tmp, flagsReg icc) %{
   %}
   ins_pipe(loadPollP);
 %}
-#endif
 
 
 // ============================================================================
@@ -11838,13 +8799,8 @@ instruct CallRuntimeDirect(method meth) %{
   effect(USE meth);
   ins_cost(CALL_COST);
   format %{ "CALL,runtime" %}
-#ifdef AARCH64
-  ins_encode( save_last_PC, Java_To_Runtime( meth ),
-              call_epilog );
-#else
   ins_encode( Java_To_Runtime( meth ),
               call_epilog );
-#endif
   ins_pipe(simple_call);
 %}
 
@@ -11970,11 +8926,7 @@ instruct ShouldNotReachHere( )
   format %{ "ShouldNotReachHere" %}
   ins_encode %{
     if (is_reachable()) {
-#ifdef AARCH64
-      __ dpcs1(0xdead);
-#else
       __ udf(0xdead);
-#endif
     }
   %}
   ins_pipe(tail_call);
@@ -12005,50 +8957,22 @@ instruct partialSubtypeCheck( R0RegP index, R1RegP sub, R2RegP super, flagsRegP
 // ============================================================================
 // inlined locking and unlocking
 
-#ifdef AARCH64
-instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch, iRegP scratch3 )
-#else
 instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch )
-#endif
 %{
   match(Set pcc (FastLock object box));
 
-#ifdef AARCH64
-  effect(TEMP scratch, TEMP scratch2, TEMP scratch3);
-#else
   predicate(!(UseBiasedLocking && !UseOptoBiasInlining));
   effect(TEMP scratch, TEMP scratch2);
-#endif
   ins_cost(DEFAULT_COST*3);
 
-#ifdef AARCH64
-  format %{ "FASTLOCK  $object, $box; KILL $scratch, $scratch2, $scratch3" %}
-  ins_encode %{
-    __ fast_lock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register, $scratch3$$Register);
-  %}
-#else
   format %{ "FASTLOCK  $object, $box; KILL $scratch, $scratch2" %}
   ins_encode %{
     __ fast_lock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register);
   %}
-#endif
   ins_pipe(long_memory_op);
 %}
 
 
-#ifdef AARCH64
-instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch, iRegP scratch3 ) %{
-  match(Set pcc (FastUnlock object box));
-  effect(TEMP scratch, TEMP scratch2, TEMP scratch3);
-  ins_cost(100);
-
-  format %{ "FASTUNLOCK  $object, $box; KILL $scratch, $scratch2, $scratch3" %}
-  ins_encode %{
-    __ fast_unlock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register, $scratch3$$Register);
-  %}
-  ins_pipe(long_memory_op);
-%}
-#else
 instruct cmpFastLock_noBiasInline(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2,
                                   iRegP scratch, iRegP scratch3) %{
   match(Set pcc (FastLock object box));
@@ -12075,48 +8999,7 @@ instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, i
   %}
   ins_pipe(long_memory_op);
 %}
-#endif
 
-#ifdef AARCH64
-// TODO: add version that takes immI cnt?
-instruct clear_array(iRegX cnt, iRegP base, iRegP ptr, iRegX temp, Universe dummy, flagsReg cpsr) %{
-  match(Set dummy (ClearArray cnt base));
-  effect(TEMP temp, TEMP ptr, KILL cpsr);
-  ins_cost(300);
-  format %{
-      "        MOV    $temp,$cnt\n"
-      "        ADD    $ptr,$base,$cnt\n"
-      "        SUBS   $temp,$temp,16\t! Count down dword pair in bytes\n"
-      "        B.lt   done16\n"
-      "loop:   STP    ZR,ZR,[$ptr,-16]!\n"
-      "        SUBS   $temp,$temp,16\t! Count down dword pair in bytes\n"
-      "        B.ge   loop\t! Clearing loop\n"
-      "done16: ADDS   $temp,$temp,8\t! Room for 1 more long?\n"
-      "        B.lt   done\n"
-      "        STR    ZR,[$base+$temp]\n"
-      "done:"
-  %}
-  ins_encode %{
-    // TODO: preload?
-    __ mov($temp$$Register, $cnt$$Register);
-    __ add($ptr$$Register, $base$$Register, $cnt$$Register);
-    Label loop, done, done16;
-    __ subs($temp$$Register, $temp$$Register, 16);
-    __ b(done16, lt);
-    __ bind(loop);
-    __ stp(ZR, ZR, Address($ptr$$Register, -16, pre_indexed));
-    __ subs($temp$$Register, $temp$$Register, 16);
-    __ b(loop, ge);
-    __ bind(done16);
-    __ adds($temp$$Register, $temp$$Register, 8);
-    __ b(done, lt);
-    // $temp should be 0 here
-    __ str(ZR, Address($base$$Register, $temp$$Register));
-    __ bind(done);
-  %}
-  ins_pipe(long_memory_op);
-%}
-#else
 // Count and Base registers are fixed because the allocator cannot
 // kill unknown registers.  The encodings are generic.
 instruct clear_array(iRegX cnt, iRegP base, iRegI temp, iRegX zero, Universe dummy, flagsReg cpsr) %{
@@ -12139,7 +9022,6 @@ instruct clear_array(iRegX cnt, iRegP base, iRegI temp, iRegX zero, Universe dum
   %}
   ins_pipe(long_memory_op);
 %}
-#endif
 
 #ifdef XXX
 // FIXME: Why R0/R1/R2/R3?
@@ -12194,17 +9076,6 @@ instruct countLeadingZerosI(iRegI dst, iRegI src) %{
   ins_pipe(ialu_reg);
 %}
 
-#ifdef AARCH64
-instruct countLeadingZerosL(iRegI dst, iRegL src) %{
-  match(Set dst (CountLeadingZerosL src));
-  size(4);
-  format %{ "CLZ $dst,$src" %}
-  ins_encode %{
-    __ clz($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-#else
 instruct countLeadingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{
   match(Set dst (CountLeadingZerosL src));
   effect(TEMP tmp, TEMP dst, KILL ccr);
@@ -12221,7 +9092,6 @@ instruct countLeadingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{
   %}
   ins_pipe(ialu_reg);
 %}
-#endif
 
 instruct countTrailingZerosI(iRegI dst, iRegI src, iRegI tmp) %{
   match(Set dst (CountTrailingZerosI src));
@@ -12236,20 +9106,6 @@ instruct countTrailingZerosI(iRegI dst, iRegI src, iRegI tmp) %{
   ins_pipe(ialu_reg);
 %}
 
-#ifdef AARCH64
-instruct countTrailingZerosL(iRegI dst, iRegL src, iRegL tmp) %{
-  match(Set dst (CountTrailingZerosL src));
-  effect(TEMP tmp);
-  size(8);
-  format %{ "RBIT $tmp, $src\n\t"
-            "CLZ  $dst,$tmp" %}
-  ins_encode %{
-    __ rbit($tmp$$Register, $src$$Register);
-    __ clz($dst$$Register, $tmp$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-#else
 instruct countTrailingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{
   match(Set dst (CountTrailingZerosL src));
   effect(TEMP tmp, TEMP dst, KILL ccr);
@@ -12270,37 +9126,10 @@ instruct countTrailingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{
   %}
   ins_pipe(ialu_reg);
 %}
-#endif
 
 
 //---------- Population Count Instructions -------------------------------------
 
-#ifdef AARCH64
-instruct popCountI(iRegI dst, iRegI src, regD_low tmp) %{
-  predicate(UsePopCountInstruction);
-  match(Set dst (PopCountI src));
-  effect(TEMP tmp);
-  size(20);
-
-  format %{ "MOV_W      $dst,$src\n\t"
-            "FMOV_dx    $tmp,$dst\n\t"
-            "VCNT       $tmp.8B,$tmp.8B\n\t"
-            "ADDV       $tmp.B,$tmp.8B\n\t"
-            "FMRS       $dst,$tmp" %}
-
-  ins_encode %{
-    __ mov_w($dst$$Register, $src$$Register);
-    __ fmov_dx($tmp$$FloatRegister, $dst$$Register);
-    int quad = 0;
-    int cnt_size = 0; // VELEM_SIZE_8
-    __ vcnt($tmp$$FloatRegister, $tmp$$FloatRegister, quad, cnt_size);
-    int add_size = 0; // VELEM_SIZE_8
-    __ addv($tmp$$FloatRegister, $tmp$$FloatRegister, quad, add_size);
-    __ fmrs($dst$$Register, $tmp$$FloatRegister);
-  %}
-  ins_pipe(ialu_reg); // FIXME
-%}
-#else
 instruct popCountI(iRegI dst, iRegI src, regD_low tmp) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountI src));
@@ -12322,32 +9151,7 @@ instruct popCountI(iRegI dst, iRegI src, regD_low tmp) %{
   %}
   ins_pipe(ialu_reg); // FIXME
 %}
-#endif
-
-#ifdef AARCH64
-instruct popCountL(iRegI dst, iRegL src, regD tmp) %{
-  predicate(UsePopCountInstruction);
-  match(Set dst (PopCountL src));
-  effect(TEMP tmp);
-  size(16);
-
-  format %{ "FMOV_dx    $tmp,$src\n\t"
-            "VCNT       $tmp.8B,$tmp.8B\n\t"
-            "ADDV       $tmp.B,$tmp.8B\n\t"
-            "FMOV_ws    $dst,$tmp" %}
 
-  ins_encode %{
-    __ fmov_dx($tmp$$FloatRegister, $src$$Register);
-    int quad = 0;
-    int cnt_size = 0;
-    __ vcnt($tmp$$FloatRegister, $tmp$$FloatRegister, quad, cnt_size);
-    int add_size = 0;
-    __ addv($tmp$$FloatRegister, $tmp$$FloatRegister, quad, add_size);
-    __ fmov_ws($dst$$Register, $tmp$$FloatRegister);
-  %}
-  ins_pipe(ialu_reg); // FIXME
-%}
-#else
 // Note: Long.bitCount(long) returns an int.
 instruct popCountL(iRegI dst, iRegL src, regD_low tmp) %{
   predicate(UsePopCountInstruction);
@@ -12373,7 +9177,6 @@ instruct popCountL(iRegI dst, iRegL src, regD_low tmp) %{
   %}
   ins_pipe(ialu_reg);
 %}
-#endif
 
 
 // ============================================================================
@@ -12385,26 +9188,13 @@ instruct bytes_reverse_int(iRegI dst, iRegI src) %{
   size(4);
   format %{ "REV32 $dst,$src" %}
   ins_encode %{
-#ifdef AARCH64
-    __ rev_w($dst$$Register, $src$$Register);
-    // high 32 bits zeroed, not sign extended
-#else
     __ rev($dst$$Register, $src$$Register);
-#endif
   %}
   ins_pipe( iload_mem ); // FIXME
 %}
 
 instruct bytes_reverse_long(iRegL dst, iRegL src) %{
   match(Set dst (ReverseBytesL src));
-#ifdef AARCH64
-//size(4);
-  format %{ "REV $dst,$src"  %}
-  ins_encode %{
-    __ rev($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg); // FIXME
-#else
   effect(TEMP dst);
   size(8);
   format %{ "REV $dst.lo,$src.lo\n\t"
@@ -12419,14 +9209,6 @@ instruct bytes_reverse_long(iRegL dst, iRegL src) %{
 
 instruct bytes_reverse_unsigned_short(iRegI dst, iRegI src) %{
   match(Set dst (ReverseBytesUS src));
-#ifdef AARCH64
-  size(4);
-  format %{ "REV16_W $dst,$src" %}
-  ins_encode %{
-    __ rev16_w($dst$$Register, $src$$Register);
-    // high 32 bits zeroed
-  %}
-#else
   size(8);
   format %{ "REV32 $dst,$src\n\t"
             "LSR   $dst,$dst,#16" %}
@@ -12434,27 +9216,16 @@ instruct bytes_reverse_unsigned_short(iRegI dst, iRegI src) %{
     __ rev($dst$$Register, $src$$Register);
     __ mov($dst$$Register, AsmOperand($dst$$Register, lsr, 16));
   %}
-#endif
   ins_pipe( iload_mem ); // FIXME
 %}
 
 instruct bytes_reverse_short(iRegI dst, iRegI src) %{
   match(Set dst (ReverseBytesS src));
-#ifdef AARCH64
-  size(8);
-  format %{ "REV16_W $dst,$src\n\t"
-            "SIGN_EXT16 $dst" %}
-  ins_encode %{
-    __ rev16_w($dst$$Register, $src$$Register);
-    __ sign_extend($dst$$Register, $dst$$Register, 16);
-  %}
-#else
   size(4);
   format %{ "REVSH $dst,$src" %}
   ins_encode %{
     __ revsh($dst$$Register, $src$$Register);
   %}
-#endif
   ins_pipe( iload_mem ); // FIXME
 %}
 
@@ -12513,7 +9284,6 @@ instruct storeV16(memoryvld mem, vecX src) %{
   ins_pipe(fstoreD_mem_reg); // FIXME
 %}
 
-#ifndef AARCH64
 // Replicate scalar to packed byte values in Double register
 instruct Repl8B_reg(vecD dst, iRegI src, iRegI tmp) %{
   predicate(n->as_Vector()->length() == 8);
@@ -12535,7 +9305,6 @@ instruct Repl8B_reg(vecD dst, iRegI src, iRegI tmp) %{
   %}
   ins_pipe(ialu_reg); // FIXME
 %}
-#endif /* !AARCH64 */
 
 // Replicate scalar to packed byte values in Double register
 instruct Repl8B_reg_simd(vecD dst, iRegI src) %{
@@ -12567,7 +9336,6 @@ instruct Repl16B_reg(vecX dst, iRegI src) %{
   ins_pipe(ialu_reg); // FIXME
 %}
 
-#ifndef AARCH64
 // Replicate scalar constant to packed byte values in Double register
 instruct Repl8B_immI(vecD dst, immI src, iRegI tmp) %{
   predicate(n->as_Vector()->length() == 8);
@@ -12581,7 +9349,6 @@ instruct Repl8B_immI(vecD dst, immI src, iRegI tmp) %{
   ins_encode( LdReplImmI(src, dst, tmp, (4), (1)) );
   ins_pipe(loadConFD); // FIXME
 %}
-#endif /* !AARCH64 */
 
 // Replicate scalar constant to packed byte values in Double register
 // TODO: support negative constants with MVNI?
@@ -12614,7 +9381,6 @@ instruct Repl16B_immU8(vecX dst, immU8 src) %{
   ins_pipe(loadConFD); // FIXME
 %}
 
-#ifndef AARCH64
 // Replicate scalar to packed short/char values into Double register
 instruct Repl4S_reg(vecD dst, iRegI src, iRegI tmp) %{
   predicate(n->as_Vector()->length() == 4);
@@ -12634,7 +9400,6 @@ instruct Repl4S_reg(vecD dst, iRegI src, iRegI tmp) %{
   %}
   ins_pipe(ialu_reg); // FIXME
 %}
-#endif /* !AARCH64 */
 
 // Replicate scalar to packed byte values in Double register
 instruct Repl4S_reg_simd(vecD dst, iRegI src) %{
@@ -12667,7 +9432,6 @@ instruct Repl8S_reg(vecX dst, iRegI src) %{
 %}
 
 
-#ifndef AARCH64
 // Replicate scalar constant to packed short/char values in Double register
 instruct Repl4S_immI(vecD dst, immI src, iRegP tmp) %{
   predicate(n->as_Vector()->length() == 4);
@@ -12681,7 +9445,6 @@ instruct Repl4S_immI(vecD dst, immI src, iRegP tmp) %{
   ins_encode( LdReplImmI(src, dst, tmp, (2), (2)) );
   ins_pipe(loadConFD); // FIXME
 %}
-#endif /* !AARCH64 */
 
 // Replicate scalar constant to packed byte values in Double register
 instruct Repl4S_immU8(vecD dst, immU8 src) %{
@@ -12713,7 +9476,6 @@ instruct Repl8S_immU8(vecX dst, immU8 src) %{
   ins_pipe(loadConFD); // FIXME
 %}
 
-#ifndef AARCH64
 // Replicate scalar to packed int values in Double register
 instruct Repl2I_reg(vecD dst, iRegI src) %{
   predicate(n->as_Vector()->length() == 2);
@@ -12744,7 +9506,6 @@ instruct Repl4I_reg(vecX dst, iRegI src) %{
   %}
   ins_pipe(ialu_reg); // FIXME
 %}
-#endif /* !AARCH64 */
 
 // Replicate scalar to packed int values in Double register
 instruct Repl2I_reg_simd(vecD dst, iRegI src) %{
@@ -12777,7 +9538,6 @@ instruct Repl4I_reg_simd(vecX dst, iRegI src) %{
 %}
 
 
-#ifndef AARCH64
 // Replicate scalar zero constant to packed int values in Double register
 instruct Repl2I_immI(vecD dst, immI src, iRegI tmp) %{
   predicate(n->as_Vector()->length() == 2);
@@ -12791,7 +9551,6 @@ instruct Repl2I_immI(vecD dst, immI src, iRegI tmp) %{
   ins_encode( LdReplImmI(src, dst, tmp, (1), (4)) );
   ins_pipe(loadConFD); // FIXME
 %}
-#endif /* !AARCH64 */
 
 // Replicate scalar constant to packed byte values in Double register
 instruct Repl2I_immU8(vecD dst, immU8 src) %{
@@ -12823,23 +9582,6 @@ instruct Repl4I_immU8(vecX dst, immU8 src) %{
   ins_pipe(loadConFD); // FIXME
 %}
 
-#ifdef AARCH64
-// Replicate scalar to packed byte values in Double register pair
-instruct Repl2L_reg(vecX dst, iRegL src) %{
-  predicate(n->as_Vector()->length() == 2);
-  match(Set dst (ReplicateL src));
-  size(4*1);
-  ins_cost(DEFAULT_COST*1); // FIXME
-
-  format %{ "VDUP.2D $dst.Q,$src\t" %}
-  ins_encode %{
-    bool quad = true;
-    __ vdupI($dst$$FloatRegister, $src$$Register,
-             MacroAssembler::VELEM_SIZE_64, quad);
-  %}
-  ins_pipe(ialu_reg); // FIXME
-%}
-#else /* !AARCH64 */
 // Replicate scalar to packed byte values in Double register pair
 instruct Repl2L_reg(vecX dst, iRegL src) %{
   predicate(n->as_Vector()->length() == 2);
@@ -12884,7 +9626,6 @@ instruct Repl2F_reg_vfp(vecD dst, regF src) %{
     Repl2F_regI(dst,tmp);
   %}
 %}
-#endif /* !AARCH64 */
 
 // Replicate scalar to packed float values in Double register
 instruct Repl2F_reg_simd(vecD dst, regF src) %{
@@ -12901,7 +9642,6 @@ instruct Repl2F_reg_simd(vecD dst, regF src) %{
   ins_pipe(ialu_reg); // FIXME
 %}
 
-#ifndef AARCH64
 // Replicate scalar to packed float values in Double register pair
 instruct Repl4F_reg(vecX dst, regF src, iRegI tmp) %{
   predicate(n->as_Vector()->length() == 4);
@@ -12921,7 +9661,6 @@ instruct Repl4F_reg(vecX dst, regF src, iRegI tmp) %{
   %}
   ins_pipe(ialu_reg); // FIXME
 %}
-#endif /* !AARCH64 */
 
 // Replicate scalar to packed float values in Double register pair
 instruct Repl4F_reg_simd(vecX dst, regF src) %{
@@ -12938,7 +9677,6 @@ instruct Repl4F_reg_simd(vecX dst, regF src) %{
   ins_pipe(ialu_reg); // FIXME
 %}
 
-#ifndef AARCH64
 // Replicate scalar zero constant to packed float values in Double register
 instruct Repl2F_immI(vecD dst, immF src, iRegI tmp) %{
   predicate(n->as_Vector()->length() == 2);
@@ -12952,22 +9690,9 @@ instruct Repl2F_immI(vecD dst, immF src, iRegI tmp) %{
   ins_encode( LdReplImmF(src, dst, tmp) );
   ins_pipe(loadConFD); // FIXME
 %}
-#endif /* !AAARCH64 */
 
 // Replicate scalar to packed double float values in Double register pair
 instruct Repl2D_reg(vecX dst, regD src) %{
-#ifdef AARCH64
-  predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd());
-  match(Set dst (ReplicateD src));
-  size(4*1);
-  ins_cost(DEFAULT_COST*1); // FIXME
-
-  format %{ "VDUP     $dst.2D,$src\t" %}
-  ins_encode %{
-    bool quad = true;
-    __ vdupD($dst$$FloatRegister, $src$$FloatRegister, quad);
-  %}
-#else
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateD src));
   size(4*2);
@@ -12982,7 +9707,6 @@ instruct Repl2D_reg(vecX dst, regD src) %{
     FloatRegister dstb = dsta->successor()->successor();
     __ fcpyd(dstb, src);
   %}
-#endif
   ins_pipe(ialu_reg); // FIXME
 %}
 
@@ -13099,7 +9823,6 @@ instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
   ins_pipe( faddD_reg_reg ); // FIXME
 %}
 
-#ifndef AARCH64
 instruct vadd2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
   predicate(n->as_Vector()->length() == 2 && !VM_Version::simd_math_is_compliant());
   match(Set dst (AddVF src1 src2));
@@ -13117,7 +9840,6 @@ instruct vadd2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
 
   ins_pipe(faddF_reg_reg); // FIXME
 %}
-#endif
 
 instruct vadd4F_reg_simd(vecX dst, vecX src1, vecX src2) %{
   predicate(n->as_Vector()->length() == 4 && VM_Version::simd_math_is_compliant());
@@ -13132,20 +9854,6 @@ instruct vadd4F_reg_simd(vecX dst, vecX src1, vecX src2) %{
   ins_pipe( faddD_reg_reg ); // FIXME
 %}
 
-#ifdef AARCH64
-instruct vadd2D_reg_simd(vecX dst, vecX src1, vecX src2) %{
-  predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant());
-  match(Set dst (AddVD src1 src2));
-  size(4);
-  format %{ "VADD.F64 $dst.Q,$src1.Q,$src2.Q\t! add packed2D" %}
-  ins_encode %{
-    bool quad = true;
-    __ vaddF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
-             MacroAssembler::VFA_SIZE_F64, quad);
-  %}
-  ins_pipe( faddD_reg_reg ); // FIXME
-%}
-#else
 instruct vadd4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
   predicate(n->as_Vector()->length() == 4 && !VM_Version::simd_math_is_compliant());
   match(Set dst (AddVF src1 src2));
@@ -13201,7 +9909,6 @@ instruct vadd2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
 
   ins_pipe(faddF_reg_reg); // FIXME
 %}
-#endif
 
 
 // Bytes vector sub
@@ -13313,7 +10020,6 @@ instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
   ins_pipe( faddF_reg_reg ); // FIXME
 %}
 
-#ifndef AARCH64
 instruct vsub2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
   predicate(n->as_Vector()->length() == 2 && !VM_Version::simd_math_is_compliant());
   match(Set dst (SubVF src1 src2));
@@ -13336,7 +10042,6 @@ instruct vsub2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
 
   ins_pipe(faddF_reg_reg); // FIXME
 %}
-#endif
 
 
 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
@@ -13352,20 +10057,6 @@ instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
   ins_pipe( faddF_reg_reg ); // FIXME
 %}
 
-#ifdef AARCH64
-instruct vsub2D_reg_simd(vecX dst, vecX src1, vecX src2) %{
-  predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant());
-  match(Set dst (SubVD src1 src2));
-  size(4);
-  format %{ "VSUB.F64 $dst.Q,$src1.Q,$src2.Q\t! add packed2D" %}
-  ins_encode %{
-    bool quad = true;
-    __ vsubF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
-             MacroAssembler::VFA_SIZE_F64, quad);
-  %}
-  ins_pipe( faddD_reg_reg ); // FIXME
-%}
-#else
 instruct vsub4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
   predicate(n->as_Vector()->length() == 4 && !VM_Version::simd_math_is_compliant());
   match(Set dst (SubVF src1 src2));
@@ -13421,7 +10112,6 @@ instruct vsub2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
 
   ins_pipe(faddF_reg_reg); // FIXME
 %}
-#endif
 
 // Shorts/Chars vector mul
 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
@@ -13486,7 +10176,6 @@ instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
   ins_pipe( fmulF_reg_reg ); // FIXME
 %}
 
-#ifndef AARCH64
 instruct vmul2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
   predicate(n->as_Vector()->length() == 2 && !VM_Version::simd_math_is_compliant());
   match(Set dst (MulVF src1 src2));
@@ -13504,7 +10193,6 @@ instruct vmul2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
 
   ins_pipe(fmulF_reg_reg); // FIXME
 %}
-#endif
 
 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
   predicate(n->as_Vector()->length() == 4 && VM_Version::simd_math_is_compliant());
@@ -13518,7 +10206,6 @@ instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
   ins_pipe( fmulF_reg_reg ); // FIXME
 %}
 
-#ifndef AARCH64
 instruct vmul4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
   predicate(n->as_Vector()->length() == 4 && !VM_Version::simd_math_is_compliant());
   match(Set dst (MulVF src1 src2));
@@ -13551,25 +10238,7 @@ instruct vmul4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
 
   ins_pipe(fmulF_reg_reg); // FIXME
 %}
-#endif
-
-#ifdef AARCH64
-instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
-  predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd());
-  match(Set dst (MulVD src1 src2));
-  size(4*1);
-  ins_cost(DEFAULT_COST*1); // FIXME
 
-  format %{ "FMUL.2D $dst,$src1,$src2\t! double[2]" %}
-  ins_encode %{
-    int quad = 1;
-    __ vmulF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
-             MacroAssembler::VFA_SIZE_F64, quad);
-  %}
-
-  ins_pipe(fdivF_reg_reg); // FIXME
-%}
-#else
 instruct vmul2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (MulVD src1 src2));
@@ -13591,26 +10260,12 @@ instruct vmul2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
 
   ins_pipe(fmulD_reg_reg); // FIXME
 %}
-#endif
 
 
 // Floats vector div
 instruct vdiv2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (DivVF src1 src2));
-#ifdef AARCH64
-  size(4*1);
-  ins_cost(DEFAULT_COST*1); // FIXME
-
-  format %{ "FDIV.2S $dst,$src1,$src2\t! float[2]" %}
-  ins_encode %{
-    int quad = 0;
-    __ vdivF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
-             MacroAssembler::VFA_SIZE_F32, quad);
-  %}
-
-  ins_pipe(fdivF_reg_reg); // FIXME
-#else
   size(4*2);
   ins_cost(DEFAULT_COST*2); // FIXME
 
@@ -13624,25 +10279,11 @@ instruct vdiv2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{
   %}
 
   ins_pipe(fdivF_reg_reg); // FIXME
-#endif
 %}
 
 instruct vdiv4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (DivVF src1 src2));
-#ifdef AARCH64
-  size(4*1);
-  ins_cost(DEFAULT_COST*1); // FIXME
-
-  format %{ "FDIV.4S $dst,$src1,$src2\t! float[4]" %}
-  ins_encode %{
-    int quad = 1;
-    __ vdivF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
-             MacroAssembler::VFA_SIZE_F32, quad);
-  %}
-
-  ins_pipe(fdivF_reg_reg); // FIXME
-#else
   size(4*4);
   ins_cost(DEFAULT_COST*4); // FIXME
 
@@ -13671,26 +10312,8 @@ instruct vdiv4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{
   %}
 
   ins_pipe(fdivF_reg_reg); // FIXME
-#endif
 %}
 
-#ifdef AARCH64
-instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
-  predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd());
-  match(Set dst (DivVD src1 src2));
-  size(4*1);
-  ins_cost(DEFAULT_COST*1); // FIXME
-
-  format %{ "FDIV.2D $dst,$src1,$src2\t! double[2]" %}
-  ins_encode %{
-    int quad = 1;
-    __ vdivF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
-             MacroAssembler::VFA_SIZE_F64, quad);
-  %}
-
-  ins_pipe(fdivF_reg_reg); // FIXME
-%}
-#else
 instruct vdiv2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (DivVD src1 src2));
@@ -13712,7 +10335,6 @@ instruct vdiv2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{
 
   ins_pipe(fdivD_reg_reg); // FIXME
 %}
-#endif
 
 // --------------------------------- NEG --------------------------------------
 
diff --git a/src/hotspot/cpu/arm/arm_64.ad b/src/hotspot/cpu/arm/arm_64.ad
deleted file mode 100644
index 5d5a6c871b3..00000000000
--- a/src/hotspot/cpu/arm/arm_64.ad
+++ /dev/null
@@ -1,998 +0,0 @@
-//
-// Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved.
-// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-//
-// This code is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License version 2 only, as
-// published by the Free Software Foundation.
-//
-// This code is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-// version 2 for more details (a copy is included in the LICENSE file that
-// accompanied this code).
-//
-// You should have received a copy of the GNU General Public License version
-// 2 along with this work; if not, write to the Free Software Foundation,
-// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-//
-// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-// or visit www.oracle.com if you need additional information or have any
-// questions.
-//
-
-// ARM Architecture Description File
-
-//----------REGISTER DEFINITION BLOCK------------------------------------------
-// This information is used by the matcher and the register allocator to
-// describe individual registers and classes of registers within the target
-// archtecture.
-register %{
-//----------Architecture Description Register Definitions----------------------
-// General Registers
-// "reg_def"  name ( register save type, C convention save type,
-//                   ideal register type, encoding, vm name );
-// Register Save Types:
-//
-// NS  = No-Save:       The register allocator assumes that these registers
-//                      can be used without saving upon entry to the method, &
-//                      that they do not need to be saved at call sites.
-//
-// SOC = Save-On-Call:  The register allocator assumes that these registers
-//                      can be used without saving upon entry to the method,
-//                      but that they must be saved at call sites.
-//
-// SOE = Save-On-Entry: The register allocator assumes that these registers
-//                      must be saved before using them upon entry to the
-//                      method, but they do not need to be saved at call
-//                      sites.
-//
-// AS  = Always-Save:   The register allocator assumes that these registers
-//                      must be saved before using them upon entry to the
-//                      method, & that they must be saved at call sites.
-//
-// Ideal Register Type is used to determine how to save & restore a
-// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
-// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
-// FIXME: above comment seems wrong.  Spill done through MachSpillCopyNode
-//
-// The encoding number is the actual bit-pattern placed into the opcodes.
-
-
-// ----------------------------
-// Integer/Long Registers
-// ----------------------------
-
-// TODO: would be nice to keep track of high-word state:
-// zeroRegI --> RegL
-// signedRegI --> RegL
-// junkRegI --> RegL
-// how to tell C2 to treak RegI as RegL, or RegL as RegI?
-reg_def R_R0  (SOC, SOC, Op_RegI,   0, R0->as_VMReg());
-reg_def R_R0x (SOC, SOC, Op_RegI, 255, R0->as_VMReg()->next());
-reg_def R_R1  (SOC, SOC, Op_RegI,   1, R1->as_VMReg());
-reg_def R_R1x (SOC, SOC, Op_RegI, 255, R1->as_VMReg()->next());
-reg_def R_R2  (SOC, SOC, Op_RegI,   2, R2->as_VMReg());
-reg_def R_R2x (SOC, SOC, Op_RegI, 255, R2->as_VMReg()->next());
-reg_def R_R3  (SOC, SOC, Op_RegI,   3, R3->as_VMReg());
-reg_def R_R3x (SOC, SOC, Op_RegI, 255, R3->as_VMReg()->next());
-reg_def R_R4  (SOC, SOC, Op_RegI,   4, R4->as_VMReg());
-reg_def R_R4x (SOC, SOC, Op_RegI, 255, R4->as_VMReg()->next());
-reg_def R_R5  (SOC, SOC, Op_RegI,   5, R5->as_VMReg());
-reg_def R_R5x (SOC, SOC, Op_RegI, 255, R5->as_VMReg()->next());
-reg_def R_R6  (SOC, SOC, Op_RegI,   6, R6->as_VMReg());
-reg_def R_R6x (SOC, SOC, Op_RegI, 255, R6->as_VMReg()->next());
-reg_def R_R7  (SOC, SOC, Op_RegI,   7, R7->as_VMReg());
-reg_def R_R7x (SOC, SOC, Op_RegI, 255, R7->as_VMReg()->next());
-
-reg_def R_R8  (SOC, SOC, Op_RegI,   8, R8->as_VMReg());
-reg_def R_R8x (SOC, SOC, Op_RegI, 255, R8->as_VMReg()->next());
-reg_def R_R9  (SOC, SOC, Op_RegI,   9, R9->as_VMReg());
-reg_def R_R9x (SOC, SOC, Op_RegI, 255, R9->as_VMReg()->next());
-reg_def R_R10 (SOC, SOC, Op_RegI,  10, R10->as_VMReg());
-reg_def R_R10x(SOC, SOC, Op_RegI, 255, R10->as_VMReg()->next());
-reg_def R_R11 (SOC, SOC, Op_RegI,  11, R11->as_VMReg());
-reg_def R_R11x(SOC, SOC, Op_RegI, 255, R11->as_VMReg()->next());
-reg_def R_R12 (SOC, SOC, Op_RegI,  12, R12->as_VMReg());
-reg_def R_R12x(SOC, SOC, Op_RegI, 255, R12->as_VMReg()->next());
-reg_def R_R13 (SOC, SOC, Op_RegI,  13, R13->as_VMReg());
-reg_def R_R13x(SOC, SOC, Op_RegI, 255, R13->as_VMReg()->next());
-reg_def R_R14 (SOC, SOC, Op_RegI,  14, R14->as_VMReg());
-reg_def R_R14x(SOC, SOC, Op_RegI, 255, R14->as_VMReg()->next());
-reg_def R_R15 (SOC, SOC, Op_RegI,  15, R15->as_VMReg());
-reg_def R_R15x(SOC, SOC, Op_RegI, 255, R15->as_VMReg()->next());
-
-reg_def R_R16 (SOC, SOC, Op_RegI,  16, R16->as_VMReg()); // IP0
-reg_def R_R16x(SOC, SOC, Op_RegI, 255, R16->as_VMReg()->next());
-reg_def R_R17 (SOC, SOC, Op_RegI,  17, R17->as_VMReg()); // IP1
-reg_def R_R17x(SOC, SOC, Op_RegI, 255, R17->as_VMReg()->next());
-reg_def R_R18 (SOC, SOC, Op_RegI,  18, R18->as_VMReg()); // Platform Register
-reg_def R_R18x(SOC, SOC, Op_RegI, 255, R18->as_VMReg()->next());
-
-reg_def R_R19 (SOC, SOE, Op_RegI,  19, R19->as_VMReg());
-reg_def R_R19x(SOC, SOE, Op_RegI, 255, R19->as_VMReg()->next());
-reg_def R_R20 (SOC, SOE, Op_RegI,  20, R20->as_VMReg());
-reg_def R_R20x(SOC, SOE, Op_RegI, 255, R20->as_VMReg()->next());
-reg_def R_R21 (SOC, SOE, Op_RegI,  21, R21->as_VMReg());
-reg_def R_R21x(SOC, SOE, Op_RegI, 255, R21->as_VMReg()->next());
-reg_def R_R22 (SOC, SOE, Op_RegI,  22, R22->as_VMReg());
-reg_def R_R22x(SOC, SOE, Op_RegI, 255, R22->as_VMReg()->next());
-reg_def R_R23 (SOC, SOE, Op_RegI,  23, R23->as_VMReg());
-reg_def R_R23x(SOC, SOE, Op_RegI, 255, R23->as_VMReg()->next());
-reg_def R_R24 (SOC, SOE, Op_RegI,  24, R24->as_VMReg());
-reg_def R_R24x(SOC, SOE, Op_RegI, 255, R24->as_VMReg()->next());
-reg_def R_R25 (SOC, SOE, Op_RegI,  25, R25->as_VMReg());
-reg_def R_R25x(SOC, SOE, Op_RegI, 255, R25->as_VMReg()->next());
-reg_def R_R26 (SOC, SOE, Op_RegI,  26, R26->as_VMReg());
-reg_def R_R26x(SOC, SOE, Op_RegI, 255, R26->as_VMReg()->next());
-reg_def R_R27 (SOC, SOE, Op_RegI,  27, R27->as_VMReg());         // Rheap_base
-reg_def R_R27x(SOC, SOE, Op_RegI, 255, R27->as_VMReg()->next()); // Rheap_base
-reg_def R_R28 ( NS, SOE, Op_RegI,  28, R28->as_VMReg());         // TLS
-reg_def R_R28x( NS, SOE, Op_RegI, 255, R28->as_VMReg()->next()); // TLS
-
-reg_def R_R29 ( NS, SOE, Op_RegI,  29, R29->as_VMReg());         // FP
-reg_def R_R29x( NS, SOE, Op_RegI, 255, R29->as_VMReg()->next()); // FP
-reg_def R_R30 (SOC, SOC, Op_RegI,  30, R30->as_VMReg());         // LR
-reg_def R_R30x(SOC, SOC, Op_RegI, 255, R30->as_VMReg()->next()); // LR
-
-reg_def R_ZR ( NS,  NS, Op_RegI,  31, ZR->as_VMReg());  // ZR
-reg_def R_ZRx( NS,  NS, Op_RegI, 255, ZR->as_VMReg()->next()); // ZR
-
-// FIXME
-//reg_def R_SP ( NS,  NS, Op_RegP,  32, SP->as_VMReg());
-reg_def R_SP ( NS,  NS, Op_RegI,  32, SP->as_VMReg());
-//reg_def R_SPx( NS, NS, Op_RegP, 255, SP->as_VMReg()->next());
-reg_def R_SPx( NS,  NS, Op_RegI, 255, SP->as_VMReg()->next());
-
-// ----------------------------
-// Float/Double/Vector Registers
-// ----------------------------
-
-reg_def  R_V0(SOC, SOC, Op_RegF,  0,  V0->as_VMReg());
-reg_def  R_V1(SOC, SOC, Op_RegF,  1,  V1->as_VMReg());
-reg_def  R_V2(SOC, SOC, Op_RegF,  2,  V2->as_VMReg());
-reg_def  R_V3(SOC, SOC, Op_RegF,  3,  V3->as_VMReg());
-reg_def  R_V4(SOC, SOC, Op_RegF,  4,  V4->as_VMReg());
-reg_def  R_V5(SOC, SOC, Op_RegF,  5,  V5->as_VMReg());
-reg_def  R_V6(SOC, SOC, Op_RegF,  6,  V6->as_VMReg());
-reg_def  R_V7(SOC, SOC, Op_RegF,  7,  V7->as_VMReg());
-reg_def  R_V8(SOC, SOC, Op_RegF,  8,  V8->as_VMReg());
-reg_def  R_V9(SOC, SOC, Op_RegF,  9,  V9->as_VMReg());
-reg_def R_V10(SOC, SOC, Op_RegF, 10, V10->as_VMReg());
-reg_def R_V11(SOC, SOC, Op_RegF, 11, V11->as_VMReg());
-reg_def R_V12(SOC, SOC, Op_RegF, 12, V12->as_VMReg());
-reg_def R_V13(SOC, SOC, Op_RegF, 13, V13->as_VMReg());
-reg_def R_V14(SOC, SOC, Op_RegF, 14, V14->as_VMReg());
-reg_def R_V15(SOC, SOC, Op_RegF, 15, V15->as_VMReg());
-reg_def R_V16(SOC, SOC, Op_RegF, 16, V16->as_VMReg());
-reg_def R_V17(SOC, SOC, Op_RegF, 17, V17->as_VMReg());
-reg_def R_V18(SOC, SOC, Op_RegF, 18, V18->as_VMReg());
-reg_def R_V19(SOC, SOC, Op_RegF, 19, V19->as_VMReg());
-reg_def R_V20(SOC, SOC, Op_RegF, 20, V20->as_VMReg());
-reg_def R_V21(SOC, SOC, Op_RegF, 21, V21->as_VMReg());
-reg_def R_V22(SOC, SOC, Op_RegF, 22, V22->as_VMReg());
-reg_def R_V23(SOC, SOC, Op_RegF, 23, V23->as_VMReg());
-reg_def R_V24(SOC, SOC, Op_RegF, 24, V24->as_VMReg());
-reg_def R_V25(SOC, SOC, Op_RegF, 25, V25->as_VMReg());
-reg_def R_V26(SOC, SOC, Op_RegF, 26, V26->as_VMReg());
-reg_def R_V27(SOC, SOC, Op_RegF, 27, V27->as_VMReg());
-reg_def R_V28(SOC, SOC, Op_RegF, 28, V28->as_VMReg());
-reg_def R_V29(SOC, SOC, Op_RegF, 29, V29->as_VMReg());
-reg_def R_V30(SOC, SOC, Op_RegF, 30, V30->as_VMReg());
-reg_def R_V31(SOC, SOC, Op_RegF, 31, V31->as_VMReg());
-
-reg_def  R_V0b(SOC, SOC, Op_RegF, 255, V0->as_VMReg()->next(1));
-reg_def  R_V1b(SOC, SOC, Op_RegF, 255, V1->as_VMReg()->next(1));
-reg_def  R_V2b(SOC, SOC, Op_RegF, 255, V2->as_VMReg()->next(1));
-reg_def  R_V3b(SOC, SOC, Op_RegF,  3,  V3->as_VMReg()->next(1));
-reg_def  R_V4b(SOC, SOC, Op_RegF,  4,  V4->as_VMReg()->next(1));
-reg_def  R_V5b(SOC, SOC, Op_RegF,  5,  V5->as_VMReg()->next(1));
-reg_def  R_V6b(SOC, SOC, Op_RegF,  6,  V6->as_VMReg()->next(1));
-reg_def  R_V7b(SOC, SOC, Op_RegF,  7,  V7->as_VMReg()->next(1));
-reg_def  R_V8b(SOC, SOC, Op_RegF, 255, V8->as_VMReg()->next(1));
-reg_def  R_V9b(SOC, SOC, Op_RegF,  9,  V9->as_VMReg()->next(1));
-reg_def R_V10b(SOC, SOC, Op_RegF, 10, V10->as_VMReg()->next(1));
-reg_def R_V11b(SOC, SOC, Op_RegF, 11, V11->as_VMReg()->next(1));
-reg_def R_V12b(SOC, SOC, Op_RegF, 12, V12->as_VMReg()->next(1));
-reg_def R_V13b(SOC, SOC, Op_RegF, 13, V13->as_VMReg()->next(1));
-reg_def R_V14b(SOC, SOC, Op_RegF, 14, V14->as_VMReg()->next(1));
-reg_def R_V15b(SOC, SOC, Op_RegF, 15, V15->as_VMReg()->next(1));
-reg_def R_V16b(SOC, SOC, Op_RegF, 16, V16->as_VMReg()->next(1));
-reg_def R_V17b(SOC, SOC, Op_RegF, 17, V17->as_VMReg()->next(1));
-reg_def R_V18b(SOC, SOC, Op_RegF, 18, V18->as_VMReg()->next(1));
-reg_def R_V19b(SOC, SOC, Op_RegF, 19, V19->as_VMReg()->next(1));
-reg_def R_V20b(SOC, SOC, Op_RegF, 20, V20->as_VMReg()->next(1));
-reg_def R_V21b(SOC, SOC, Op_RegF, 21, V21->as_VMReg()->next(1));
-reg_def R_V22b(SOC, SOC, Op_RegF, 22, V22->as_VMReg()->next(1));
-reg_def R_V23b(SOC, SOC, Op_RegF, 23, V23->as_VMReg()->next(1));
-reg_def R_V24b(SOC, SOC, Op_RegF, 24, V24->as_VMReg()->next(1));
-reg_def R_V25b(SOC, SOC, Op_RegF, 25, V25->as_VMReg()->next(1));
-reg_def R_V26b(SOC, SOC, Op_RegF, 26, V26->as_VMReg()->next(1));
-reg_def R_V27b(SOC, SOC, Op_RegF, 27, V27->as_VMReg()->next(1));
-reg_def R_V28b(SOC, SOC, Op_RegF, 28, V28->as_VMReg()->next(1));
-reg_def R_V29b(SOC, SOC, Op_RegF, 29, V29->as_VMReg()->next(1));
-reg_def R_V30b(SOC, SOC, Op_RegD, 30, V30->as_VMReg()->next(1));
-reg_def R_V31b(SOC, SOC, Op_RegF, 31, V31->as_VMReg()->next(1));
-
-reg_def  R_V0c(SOC, SOC, Op_RegF,  0,  V0->as_VMReg()->next(2));
-reg_def  R_V1c(SOC, SOC, Op_RegF,  1,  V1->as_VMReg()->next(2));
-reg_def  R_V2c(SOC, SOC, Op_RegF,  2,  V2->as_VMReg()->next(2));
-reg_def  R_V3c(SOC, SOC, Op_RegF,  3,  V3->as_VMReg()->next(2));
-reg_def  R_V4c(SOC, SOC, Op_RegF,  4,  V4->as_VMReg()->next(2));
-reg_def  R_V5c(SOC, SOC, Op_RegF,  5,  V5->as_VMReg()->next(2));
-reg_def  R_V6c(SOC, SOC, Op_RegF,  6,  V6->as_VMReg()->next(2));
-reg_def  R_V7c(SOC, SOC, Op_RegF,  7,  V7->as_VMReg()->next(2));
-reg_def  R_V8c(SOC, SOC, Op_RegF,  8,  V8->as_VMReg()->next(2));
-reg_def  R_V9c(SOC, SOC, Op_RegF,  9,  V9->as_VMReg()->next(2));
-reg_def R_V10c(SOC, SOC, Op_RegF, 10, V10->as_VMReg()->next(2));
-reg_def R_V11c(SOC, SOC, Op_RegF, 11, V11->as_VMReg()->next(2));
-reg_def R_V12c(SOC, SOC, Op_RegF, 12, V12->as_VMReg()->next(2));
-reg_def R_V13c(SOC, SOC, Op_RegF, 13, V13->as_VMReg()->next(2));
-reg_def R_V14c(SOC, SOC, Op_RegF, 14, V14->as_VMReg()->next(2));
-reg_def R_V15c(SOC, SOC, Op_RegF, 15, V15->as_VMReg()->next(2));
-reg_def R_V16c(SOC, SOC, Op_RegF, 16, V16->as_VMReg()->next(2));
-reg_def R_V17c(SOC, SOC, Op_RegF, 17, V17->as_VMReg()->next(2));
-reg_def R_V18c(SOC, SOC, Op_RegF, 18, V18->as_VMReg()->next(2));
-reg_def R_V19c(SOC, SOC, Op_RegF, 19, V19->as_VMReg()->next(2));
-reg_def R_V20c(SOC, SOC, Op_RegF, 20, V20->as_VMReg()->next(2));
-reg_def R_V21c(SOC, SOC, Op_RegF, 21, V21->as_VMReg()->next(2));
-reg_def R_V22c(SOC, SOC, Op_RegF, 22, V22->as_VMReg()->next(2));
-reg_def R_V23c(SOC, SOC, Op_RegF, 23, V23->as_VMReg()->next(2));
-reg_def R_V24c(SOC, SOC, Op_RegF, 24, V24->as_VMReg()->next(2));
-reg_def R_V25c(SOC, SOC, Op_RegF, 25, V25->as_VMReg()->next(2));
-reg_def R_V26c(SOC, SOC, Op_RegF, 26, V26->as_VMReg()->next(2));
-reg_def R_V27c(SOC, SOC, Op_RegF, 27, V27->as_VMReg()->next(2));
-reg_def R_V28c(SOC, SOC, Op_RegF, 28, V28->as_VMReg()->next(2));
-reg_def R_V29c(SOC, SOC, Op_RegF, 29, V29->as_VMReg()->next(2));
-reg_def R_V30c(SOC, SOC, Op_RegF, 30, V30->as_VMReg()->next(2));
-reg_def R_V31c(SOC, SOC, Op_RegF, 31, V31->as_VMReg()->next(2));
-
-reg_def  R_V0d(SOC, SOC, Op_RegF,  0,  V0->as_VMReg()->next(3));
-reg_def  R_V1d(SOC, SOC, Op_RegF,  1,  V1->as_VMReg()->next(3));
-reg_def  R_V2d(SOC, SOC, Op_RegF,  2,  V2->as_VMReg()->next(3));
-reg_def  R_V3d(SOC, SOC, Op_RegF,  3,  V3->as_VMReg()->next(3));
-reg_def  R_V4d(SOC, SOC, Op_RegF,  4,  V4->as_VMReg()->next(3));
-reg_def  R_V5d(SOC, SOC, Op_RegF,  5,  V5->as_VMReg()->next(3));
-reg_def  R_V6d(SOC, SOC, Op_RegF,  6,  V6->as_VMReg()->next(3));
-reg_def  R_V7d(SOC, SOC, Op_RegF,  7,  V7->as_VMReg()->next(3));
-reg_def  R_V8d(SOC, SOC, Op_RegF,  8,  V8->as_VMReg()->next(3));
-reg_def  R_V9d(SOC, SOC, Op_RegF,  9,  V9->as_VMReg()->next(3));
-reg_def R_V10d(SOC, SOC, Op_RegF, 10, V10->as_VMReg()->next(3));
-reg_def R_V11d(SOC, SOC, Op_RegF, 11, V11->as_VMReg()->next(3));
-reg_def R_V12d(SOC, SOC, Op_RegF, 12, V12->as_VMReg()->next(3));
-reg_def R_V13d(SOC, SOC, Op_RegF, 13, V13->as_VMReg()->next(3));
-reg_def R_V14d(SOC, SOC, Op_RegF, 14, V14->as_VMReg()->next(3));
-reg_def R_V15d(SOC, SOC, Op_RegF, 15, V15->as_VMReg()->next(3));
-reg_def R_V16d(SOC, SOC, Op_RegF, 16, V16->as_VMReg()->next(3));
-reg_def R_V17d(SOC, SOC, Op_RegF, 17, V17->as_VMReg()->next(3));
-reg_def R_V18d(SOC, SOC, Op_RegF, 18, V18->as_VMReg()->next(3));
-reg_def R_V19d(SOC, SOC, Op_RegF, 19, V19->as_VMReg()->next(3));
-reg_def R_V20d(SOC, SOC, Op_RegF, 20, V20->as_VMReg()->next(3));
-reg_def R_V21d(SOC, SOC, Op_RegF, 21, V21->as_VMReg()->next(3));
-reg_def R_V22d(SOC, SOC, Op_RegF, 22, V22->as_VMReg()->next(3));
-reg_def R_V23d(SOC, SOC, Op_RegF, 23, V23->as_VMReg()->next(3));
-reg_def R_V24d(SOC, SOC, Op_RegF, 24, V24->as_VMReg()->next(3));
-reg_def R_V25d(SOC, SOC, Op_RegF, 25, V25->as_VMReg()->next(3));
-reg_def R_V26d(SOC, SOC, Op_RegF, 26, V26->as_VMReg()->next(3));
-reg_def R_V27d(SOC, SOC, Op_RegF, 27, V27->as_VMReg()->next(3));
-reg_def R_V28d(SOC, SOC, Op_RegF, 28, V28->as_VMReg()->next(3));
-reg_def R_V29d(SOC, SOC, Op_RegF, 29, V29->as_VMReg()->next(3));
-reg_def R_V30d(SOC, SOC, Op_RegF, 30, V30->as_VMReg()->next(3));
-reg_def R_V31d(SOC, SOC, Op_RegF, 31, V31->as_VMReg()->next(3));
-
-// ----------------------------
-// Special Registers
-// Condition Codes Flag Registers
-reg_def APSR (SOC, SOC,  Op_RegFlags, 255, VMRegImpl::Bad());
-reg_def FPSCR(SOC, SOC,  Op_RegFlags, 255, VMRegImpl::Bad());
-
-// ----------------------------
-// Specify the enum values for the registers.  These enums are only used by the
-// OptoReg "class". We can convert these enum values at will to VMReg when needed
-// for visibility to the rest of the vm. The order of this enum influences the
-// register allocator so having the freedom to set this order and not be stuck
-// with the order that is natural for the rest of the vm is worth it.
-
-// Quad vector must be aligned here, so list them first.
-alloc_class fprs(
-    R_V8,  R_V8b,  R_V8c,  R_V8d,  R_V9,  R_V9b,  R_V9c,  R_V9d,
-    R_V10, R_V10b, R_V10c, R_V10d, R_V11, R_V11b, R_V11c, R_V11d,
-    R_V12, R_V12b, R_V12c, R_V12d, R_V13, R_V13b, R_V13c, R_V13d,
-    R_V14, R_V14b, R_V14c, R_V14d, R_V15, R_V15b, R_V15c, R_V15d,
-    R_V16, R_V16b, R_V16c, R_V16d, R_V17, R_V17b, R_V17c, R_V17d,
-    R_V18, R_V18b, R_V18c, R_V18d, R_V19, R_V19b, R_V19c, R_V19d,
-    R_V20, R_V20b, R_V20c, R_V20d, R_V21, R_V21b, R_V21c, R_V21d,
-    R_V22, R_V22b, R_V22c, R_V22d, R_V23, R_V23b, R_V23c, R_V23d,
-    R_V24, R_V24b, R_V24c, R_V24d, R_V25, R_V25b, R_V25c, R_V25d,
-    R_V26, R_V26b, R_V26c, R_V26d, R_V27, R_V27b, R_V27c, R_V27d,
-    R_V28, R_V28b, R_V28c, R_V28d, R_V29, R_V29b, R_V29c, R_V29d,
-    R_V30, R_V30b, R_V30c, R_V30d, R_V31, R_V31b, R_V31c, R_V31d,
-    R_V0,  R_V0b,  R_V0c,  R_V0d,  R_V1,  R_V1b,  R_V1c,  R_V1d,
-    R_V2,  R_V2b,  R_V2c,  R_V2d,  R_V3,  R_V3b,  R_V3c,  R_V3d,
-    R_V4,  R_V4b,  R_V4c,  R_V4d,  R_V5,  R_V5b,  R_V5c,  R_V5d,
-    R_V6,  R_V6b,  R_V6c,  R_V6d,  R_V7,  R_V7b,  R_V7c,  R_V7d
-);
-
-// Need double-register alignment here.
-// We are already quad-register aligned because of vectors above.
-alloc_class gprs(
-    R_R0,  R_R0x,  R_R1,  R_R1x,  R_R2,  R_R2x,  R_R3,  R_R3x,
-    R_R4,  R_R4x,  R_R5,  R_R5x,  R_R6,  R_R6x,  R_R7,  R_R7x,
-    R_R8,  R_R8x,  R_R9,  R_R9x,  R_R10, R_R10x, R_R11, R_R11x,
-    R_R12, R_R12x, R_R13, R_R13x, R_R14, R_R14x, R_R15, R_R15x,
-    R_R16, R_R16x, R_R17, R_R17x, R_R18, R_R18x, R_R19, R_R19x,
-    R_R20, R_R20x, R_R21, R_R21x, R_R22, R_R22x, R_R23, R_R23x,
-    R_R24, R_R24x, R_R25, R_R25x, R_R26, R_R26x, R_R27, R_R27x,
-    R_R28, R_R28x, R_R29, R_R29x, R_R30, R_R30x
-);
-// Continuing with double-reigister alignment...
-alloc_class chunk2(APSR, FPSCR);
-alloc_class chunk3(R_SP, R_SPx);
-alloc_class chunk4(R_ZR, R_ZRx);
-
-//----------Architecture Description Register Classes--------------------------
-// Several register classes are automatically defined based upon information in
-// this architecture description.
-// 1) reg_class inline_cache_reg           ( as defined in frame section )
-// 2) reg_class interpreter_method_oop_reg ( as defined in frame section )
-// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
-//
-
-// ----------------------------
-// Integer Register Classes
-// ----------------------------
-reg_class int_reg_all(R_R0,  R_R1,  R_R2,  R_R3,  R_R4,  R_R5,  R_R6,  R_R7,
-                      R_R8,  R_R9,  R_R10, R_R11, R_R12, R_R13, R_R14, R_R15,
-                      R_R16, R_R17, R_R18, R_R19, R_R20, R_R21, R_R22, R_R23,
-                      R_R24, R_R25, R_R26, R_R27, R_R28, R_R29, R_R30
-);
-
-// Exclusions from i_reg:
-// SP (R31)
-// Rthread/R28: reserved by HotSpot to the TLS register (invariant within Java)
-reg_class int_reg %{
-    return _INT_REG_mask;
-%}
-reg_class ptr_reg %{
-    return _PTR_REG_mask;
-%}
-reg_class vectorx_reg %{
-    return _VECTORX_REG_mask;
-%}
-
-reg_class R0_regI(R_R0);
-reg_class R1_regI(R_R1);
-reg_class R2_regI(R_R2);
-reg_class R3_regI(R_R3);
-//reg_class R12_regI(R_R12);
-
-// ----------------------------
-// Pointer Register Classes
-// ----------------------------
-
-// Special class for storeP instructions, which can store SP or RPC to TLS.
-// It is also used for memory addressing, allowing direct TLS addressing.
-
-reg_class sp_ptr_reg %{
-    return _SP_PTR_REG_mask;
-%}
-
-reg_class store_reg %{
-    return _STR_REG_mask;
-%}
-
-reg_class store_ptr_reg %{
-    return _STR_PTR_REG_mask;
-%}
-
-reg_class spillP_reg %{
-    return _SPILLP_REG_mask;
-%}
-
-// Other special pointer regs
-reg_class R0_regP(R_R0, R_R0x);
-reg_class R1_regP(R_R1, R_R1x);
-reg_class R2_regP(R_R2, R_R2x);
-reg_class Rexception_regP(R_R19, R_R19x);
-reg_class Ricklass_regP(R_R8, R_R8x);
-reg_class Rmethod_regP(R_R27, R_R27x);
-
-reg_class Rthread_regP(R_R28, R_R28x);
-reg_class IP_regP(R_R16, R_R16x);
-#define RtempRegP IPRegP
-reg_class LR_regP(R_R30, R_R30x);
-
-reg_class SP_regP(R_SP,  R_SPx);
-reg_class FP_regP(R_R29, R_R29x);
-
-reg_class ZR_regP(R_ZR, R_ZRx);
-reg_class ZR_regI(R_ZR);
-
-// ----------------------------
-// Long Register Classes
-// ----------------------------
-reg_class long_reg %{ return _PTR_REG_mask; %}
-// for ldrexd, strexd: first reg of pair must be even
-reg_class long_reg_align %{ return LONG_REG_mask(); %}
-
-reg_class R0_regL(R_R0,R_R0x); // arg 1 or return value
-
-// ----------------------------
-// Special Class for Condition Code Flags Register
-reg_class int_flags(APSR);
-reg_class float_flags(FPSCR);
-
-
-// ----------------------------
-// Float Point Register Classes
-// ----------------------------
-reg_class sflt_reg_0(
-  R_V0,  R_V1,  R_V2,  R_V3,  R_V4,  R_V5,  R_V6,  R_V7,
-  R_V8,  R_V9,  R_V10, R_V11, R_V12, R_V13, R_V14, R_V15,
-  R_V16, R_V17, R_V18, R_V19, R_V20, R_V21, R_V22, R_V23,
-  R_V24, R_V25, R_V26, R_V27, R_V28, R_V29, R_V30, R_V31);
-
-reg_class sflt_reg %{
-    return _SFLT_REG_mask;
-%}
-
-reg_class dflt_low_reg %{
-    return _DFLT_REG_mask;
-%}
-
-reg_class actual_dflt_reg %{
-    return _DFLT_REG_mask;
-%}
-
-reg_class vectorx_reg_0(
-  R_V0,  R_V1,  R_V2,  R_V3,  R_V4,  R_V5, R_V6, R_V7,
-  R_V8,  R_V9,  R_V10, R_V11, R_V12, R_V13, R_V14, R_V15,
-  R_V16, R_V17, R_V18, R_V19, R_V20, R_V21, R_V22, R_V23,
-  R_V24, R_V25, R_V26, R_V27, R_V28, R_V29, R_V30, /*R_V31,*/
-  R_V0b,  R_V1b,  R_V2b,  R_V3b,  R_V4b,  R_V5b,  R_V6b,  R_V7b,
-  R_V8b,  R_V9b,  R_V10b, R_V11b, R_V12b, R_V13b, R_V14b, R_V15b,
-  R_V16b, R_V17b, R_V18b, R_V19b, R_V20b, R_V21b, R_V22b, R_V23b,
-  R_V24b, R_V25b, R_V26b, R_V27b, R_V28b, R_V29b, R_V30b, /*R_V31b,*/
-  R_V0c,  R_V1c,  R_V2c,  R_V3c,  R_V4c,  R_V5c,  R_V6c,  R_V7c,
-  R_V8c,  R_V9c,  R_V10c, R_V11c, R_V12c, R_V13c, R_V14c, R_V15c,
-  R_V16c, R_V17c, R_V18c, R_V19c, R_V20c, R_V21c, R_V22c, R_V23c,
-  R_V24c, R_V25c, R_V26c, R_V27c, R_V28c, R_V29c, R_V30c, /*R_V31c,*/
-  R_V0d,  R_V1d,  R_V2d,  R_V3d,  R_V4d,  R_V5d,  R_V6d,  R_V7d,
-  R_V8d,  R_V9d,  R_V10d, R_V11d, R_V12d, R_V13d, R_V14d, R_V15d,
-  R_V16d, R_V17d, R_V18d, R_V19d, R_V20d, R_V21d, R_V22d, R_V23d,
-  R_V24d, R_V25d, R_V26d, R_V27d, R_V28d, R_V29d, R_V30d, /*R_V31d*/);
-
-reg_class Rmemcopy_reg %{
-    return _RMEMCOPY_REG_mask;
-%}
-
-%}
-
-source_hpp %{
-
-const MachRegisterNumbers R_mem_copy_lo_num = R_V31_num;
-const MachRegisterNumbers R_mem_copy_hi_num = R_V31b_num;
-const FloatRegister Rmemcopy = V31;
-
-const MachRegisterNumbers R_hf_ret_lo_num = R_V0_num;
-const MachRegisterNumbers R_hf_ret_hi_num = R_V0b_num;
-const FloatRegister Rhfret = V0;
-
-extern OptoReg::Name R_Ricklass_num;
-extern OptoReg::Name R_Rmethod_num;
-extern OptoReg::Name R_tls_num;
-extern OptoReg::Name R_Rheap_base_num;
-
-extern RegMask _INT_REG_mask;
-extern RegMask _PTR_REG_mask;
-extern RegMask _SFLT_REG_mask;
-extern RegMask _DFLT_REG_mask;
-extern RegMask _VECTORX_REG_mask;
-extern RegMask _RMEMCOPY_REG_mask;
-extern RegMask _SP_PTR_REG_mask;
-extern RegMask _SPILLP_REG_mask;
-extern RegMask _STR_REG_mask;
-extern RegMask _STR_PTR_REG_mask;
-
-#define LDR_DOUBLE "LDR_D"
-#define LDR_FLOAT  "LDR_S"
-#define STR_DOUBLE "STR_D"
-#define STR_FLOAT  "STR_S"
-#define STR_64     "STR"
-#define LDR_64     "LDR"
-#define STR_32     "STR_W"
-#define LDR_32     "LDR_W"
-#define MOV_DOUBLE "FMOV_D"
-#define MOV_FLOAT  "FMOV_S"
-#define FMSR       "FMOV_SW"
-#define FMRS       "FMOV_WS"
-#define LDREX      "ldxr  "
-#define STREX      "stxr  "
-
-#define str_64     str
-#define ldr_64     ldr
-#define ldr_32     ldr_w
-#define ldrex      ldxr
-#define strex      stxr
-
-#define fmsr       fmov_sw
-#define fmrs       fmov_ws
-#define fconsts    fmov_s
-#define fconstd    fmov_d
-
-static inline bool is_uimm12(jlong imm, int shift) {
-  return Assembler::is_unsigned_imm_in_range(imm, 12, shift);
-}
-
-static inline bool is_memoryD(int offset) {
-  int scale = 3; // LogBytesPerDouble
-  return is_uimm12(offset, scale);
-}
-
-static inline bool is_memoryfp(int offset) {
-  int scale = LogBytesPerInt; // include 32-bit word accesses
-  return is_uimm12(offset, scale);
-}
-
-static inline bool is_memoryI(int offset) {
-  int scale = LogBytesPerInt;
-  return is_uimm12(offset, scale);
-}
-
-static inline bool is_memoryP(int offset) {
-  int scale = LogBytesPerWord;
-  return is_uimm12(offset, scale);
-}
-
-static inline bool is_memoryHD(int offset) {
-  int scale = LogBytesPerInt; // include 32-bit word accesses
-  return is_uimm12(offset, scale);
-}
-
-uintx limmL_low(uintx imm, int n);
-
-static inline bool Xis_aimm(int imm) {
-  return Assembler::ArithmeticImmediate(imm).is_encoded();
-}
-
-static inline bool is_aimm(intptr_t imm) {
-  return Assembler::ArithmeticImmediate(imm).is_encoded();
-}
-
-static inline bool is_limmL(uintptr_t imm) {
-  return Assembler::LogicalImmediate(imm).is_encoded();
-}
-
-static inline bool is_limmL_low(intptr_t imm, int n) {
-  return is_limmL(limmL_low(imm, n));
-}
-
-static inline bool is_limmI(jint imm) {
-  return Assembler::LogicalImmediate(imm, true).is_encoded();
-}
-
-static inline uintx limmI_low(jint imm, int n) {
-  return limmL_low(imm, n);
-}
-
-static inline bool is_limmI_low(jint imm, int n) {
-  return is_limmL_low(imm, n);
-}
-
-%}
-
-source %{
-
-// Given a register encoding, produce a Integer Register object
-static Register reg_to_register_object(int register_encoding) {
-  assert(R0->encoding() == R_R0_enc && R30->encoding() == R_R30_enc, "right coding");
-  assert(Rthread->encoding() == R_R28_enc, "right coding");
-  assert(SP->encoding() == R_SP_enc, "right coding");
-  return as_Register(register_encoding);
-}
-
-// Given a register encoding, produce a single-precision Float Register object
-static FloatRegister reg_to_FloatRegister_object(int register_encoding) {
-  assert(V0->encoding() == R_V0_enc && V31->encoding() == R_V31_enc, "right coding");
-  return as_FloatRegister(register_encoding);
-}
-
-RegMask _INT_REG_mask;
-RegMask _PTR_REG_mask;
-RegMask _SFLT_REG_mask;
-RegMask _DFLT_REG_mask;
-RegMask _VECTORX_REG_mask;
-RegMask _RMEMCOPY_REG_mask;
-RegMask _SP_PTR_REG_mask;
-RegMask _SPILLP_REG_mask;
-RegMask _STR_REG_mask;
-RegMask _STR_PTR_REG_mask;
-
-OptoReg::Name R_Ricklass_num = -1;
-OptoReg::Name R_Rmethod_num  = -1;
-OptoReg::Name R_tls_num      = -1;
-OptoReg::Name R_Rtemp_num    = -1;
-OptoReg::Name R_Rheap_base_num = -1;
-
-static int mov_oop_size = -1;
-
-#ifdef ASSERT
-static bool same_mask(const RegMask &a, const RegMask &b) {
-    RegMask a_sub_b = a; a_sub_b.SUBTRACT(b);
-    RegMask b_sub_a = b; b_sub_a.SUBTRACT(a);
-    return a_sub_b.Size() == 0 && b_sub_a.Size() == 0;
-}
-#endif
-
-void Compile::pd_compiler2_init() {
-
-    R_Ricklass_num = OptoReg::as_OptoReg(Ricklass->as_VMReg());
-    R_Rmethod_num  = OptoReg::as_OptoReg(Rmethod->as_VMReg());
-    R_tls_num      = OptoReg::as_OptoReg(Rthread->as_VMReg());
-    R_Rtemp_num    = OptoReg::as_OptoReg(Rtemp->as_VMReg());
-    R_Rheap_base_num = OptoReg::as_OptoReg(Rheap_base->as_VMReg());
-
-    _INT_REG_mask = _INT_REG_ALL_mask;
-    _INT_REG_mask.Remove(R_tls_num);
-    _INT_REG_mask.Remove(R_SP_num);
-    if (UseCompressedOops) {
-      _INT_REG_mask.Remove(R_Rheap_base_num);
-    }
-    // Remove Rtemp because safepoint poll can trash it
-    // (see SharedRuntime::generate_handler_blob)
-    _INT_REG_mask.Remove(R_Rtemp_num);
-
-    _PTR_REG_mask = _INT_REG_mask;
-    _PTR_REG_mask.smear_to_sets(2);
-
-    // STR_REG    = INT_REG+ZR
-    // SPILLP_REG = INT_REG+SP
-    // SP_PTR_REG = INT_REG+SP+TLS
-    _STR_REG_mask = _INT_REG_mask;
-    _SP_PTR_REG_mask = _STR_REG_mask;
-    _STR_REG_mask.Insert(R_ZR_num);
-    _SP_PTR_REG_mask.Insert(R_SP_num);
-    _SPILLP_REG_mask = _SP_PTR_REG_mask;
-    _SP_PTR_REG_mask.Insert(R_tls_num);
-    _STR_PTR_REG_mask = _STR_REG_mask;
-    _STR_PTR_REG_mask.smear_to_sets(2);
-    _SP_PTR_REG_mask.smear_to_sets(2);
-    _SPILLP_REG_mask.smear_to_sets(2);
-
-    _RMEMCOPY_REG_mask = RegMask(R_mem_copy_lo_num);
-assert(OptoReg::as_OptoReg(Rmemcopy->as_VMReg()) == R_mem_copy_lo_num, "!");
-
-    _SFLT_REG_mask = _SFLT_REG_0_mask;
-    _SFLT_REG_mask.SUBTRACT(_RMEMCOPY_REG_mask);
-    _DFLT_REG_mask = _SFLT_REG_mask;
-    _DFLT_REG_mask.smear_to_sets(2);
-    _VECTORX_REG_mask = _SFLT_REG_mask;
-    _VECTORX_REG_mask.smear_to_sets(4);
-    assert(same_mask(_VECTORX_REG_mask, _VECTORX_REG_0_mask), "!");
-
-#ifdef ASSERT
-    RegMask r((RegMask *)&SFLT_REG_mask());
-    r.smear_to_sets(2);
-    assert(same_mask(r, _DFLT_REG_mask), "!");
-#endif
-
-    if (VM_Version::prefer_moves_over_load_literal()) {
-      mov_oop_size = 4;
-    } else {
-      mov_oop_size = 1;
-    }
-
-    assert(Matcher::interpreter_method_oop_reg_encode() == Rmethod->encoding(), "should be");
-}
-
-uintx limmL_low(uintx imm, int n) {
-  // 1: try as is
-  if (is_limmL(imm)) {
-    return imm;
-  }
-  // 2: try low bits + all 0's
-  uintx imm0 = imm & right_n_bits(n);
-  if (is_limmL(imm0)) {
-    return imm0;
-  }
-  // 3: try low bits + all 1's
-  uintx imm1 = imm0 | left_n_bits(BitsPerWord - n);
-  if (is_limmL(imm1)) {
-    return imm1;
-  }
-#if 0
-  // 4: try low bits replicated
-  int field = 1 << log2_intptr(n + n - 1);
-  assert(field >= n, "!");
-  assert(field / n == 1, "!");
-  intptr_t immr = immx;
-  while (field < BitsPerWord) {
-    intrptr_t bits = immr & right_n_bits(field);
-    immr = bits | (bits << field);
-    field = field << 1;
-  }
-  // replicate at power-of-2 boundary
-  if (is_limmL(immr)) {
-    return immr;
-  }
-#endif
-  return imm;
-}
-
-// Convert the raw encoding form into the form expected by the
-// constructor for Address.
-Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
-  RelocationHolder rspec;
-  if (disp_reloc != relocInfo::none) {
-    rspec = Relocation::spec_simple(disp_reloc);
-  }
-
-  Register rbase = (base == 0xff) ? SP : as_Register(base);
-  if (index != 0xff) {
-    Register rindex = as_Register(index);
-    if (disp == 0x7fffffff) { // special value to indicate sign-extend
-      Address madr(rbase, rindex, ex_sxtw, scale);
-      madr._rspec = rspec;
-      return madr;
-    } else {
-      assert(disp == 0, "unsupported");
-      Address madr(rbase, rindex, ex_lsl, scale);
-      madr._rspec = rspec;
-      return madr;
-    }
-  } else {
-    assert(scale == 0, "not supported");
-    Address madr(rbase, disp);
-    madr._rspec = rspec;
-    return madr;
-  }
-}
-
-// Location of compiled Java return values.  Same as C
-OptoRegPair c2::return_value(int ideal_reg) {
-  assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
-  static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num,     R_R0_num,  R_hf_ret_lo_num,  R_hf_ret_lo_num, R_R0_num };
-  static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, R_R0x_num, OptoReg::Bad,     R_hf_ret_hi_num, R_R0x_num };
-  return OptoRegPair( hi[ideal_reg], lo[ideal_reg]);
-}
-
-// !!!!! Special hack to get all type of calls to specify the byte offset
-//       from the start of the call to the point where the return address
-//       will point.
-
-int MachCallStaticJavaNode::ret_addr_offset() {
-  bool far = (_method == NULL) ? maybe_far_call(this) : !cache_reachable();
-  bool patchable = _method != NULL;
-  int call_size = MacroAssembler::call_size(entry_point(), far, patchable);
-  return (call_size + (_method_handle_invoke ? 1 : 0)) * NativeInstruction::instruction_size;
-}
-
-int MachCallDynamicJavaNode::ret_addr_offset() {
-  bool far = !cache_reachable();
-  int call_size = MacroAssembler::call_size(entry_point(), far, true);
-  return (mov_oop_size + call_size) * NativeInstruction::instruction_size; 
-}
-
-int MachCallRuntimeNode::ret_addr_offset() {
-  int call_size = 0;
-  // TODO: check if Leaf nodes also need this
-  if (!is_MachCallLeaf()) {
-    // adr $temp, ret_addr
-    // str $temp, [SP + last_java_pc]
-    call_size += 2;
-  }
-  // bl or mov_slow; blr
-  bool far = maybe_far_call(this);
-  call_size += MacroAssembler::call_size(entry_point(), far, false);
-  return call_size * NativeInstruction::instruction_size;
-}
-
-%}
-
-// The intptr_t operand types, defined by textual substitution.
-// (Cf. opto/type.hpp.  This lets us avoid many, many other ifdefs.)
-#define immX      immL
-#define iRegX     iRegL
-#define aimmX     aimmL
-#define limmX     limmL
-#define immX9     immL9
-#define LShiftX   LShiftL
-#define shimmX    immU6
-
-#define store_RegLd store_RegL
-
-//----------ATTRIBUTES---------------------------------------------------------
-//----------Operand Attributes-------------------------------------------------
-op_attrib op_cost(1);          // Required cost attribute
-
-//----------OPERANDS-----------------------------------------------------------
-// Operand definitions must precede instruction definitions for correct parsing
-// in the ADLC because operands constitute user defined types which are used in
-// instruction definitions.
-
-//----------Simple Operands----------------------------------------------------
-// Immediate Operands
-
-// Integer Immediate: 9-bit (including sign bit), so same as immI8?
-// FIXME: simm9 allows -256, but immI8 doesn't...
-operand simm9() %{
-  predicate(Assembler::is_imm_in_range(n->get_int(), 9, 0));
-  match(ConI);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-
-operand uimm12() %{
-  predicate(Assembler::is_unsigned_imm_in_range(n->get_int(), 12, 0));
-  match(ConI);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand aimmP() %{
-  predicate(n->get_ptr() == 0 || (is_aimm(n->get_ptr()) && ((ConPNode*)n)->type()->reloc() == relocInfo::none));
-  match(ConP);
-
-  op_cost(0);
-  // formats are generated automatically for constants and base registers
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Long Immediate: 12-bit - for addressing mode
-operand immL12() %{
-  predicate((-4096 < n->get_long()) && (n->get_long() < 4096));
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Long Immediate: 9-bit - for addressing mode
-operand immL9() %{
-  predicate((-256 <= n->get_long()) && (n->get_long() < 256));
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immIMov() %{
-  predicate(n->get_int() >> 16 == 0);
-  match(ConI);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immLMov() %{
-  predicate(n->get_long() >> 16 == 0);
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immUL12() %{
-  predicate(is_uimm12(n->get_long(), 0));
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immUL12x2() %{
-  predicate(is_uimm12(n->get_long(), 1));
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immUL12x4() %{
-  predicate(is_uimm12(n->get_long(), 2));
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immUL12x8() %{
-  predicate(is_uimm12(n->get_long(), 3));
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immUL12x16() %{
-  predicate(is_uimm12(n->get_long(), 4));
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Used for long shift
-operand immU6() %{
-  predicate(0 <= n->get_int() && (n->get_int() <= 63));
-  match(ConI);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Used for register extended shift
-operand immI_0_4() %{
-  predicate(0 <= n->get_int() && (n->get_int() <= 4));
-  match(ConI);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Compressed Pointer Register
-operand iRegN() %{
-  constraint(ALLOC_IN_RC(int_reg));
-  match(RegN);
-  match(ZRRegN);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand SPRegP() %{
-  constraint(ALLOC_IN_RC(SP_regP));
-  match(RegP);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand ZRRegP() %{
-  constraint(ALLOC_IN_RC(ZR_regP));
-  match(RegP);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand ZRRegL() %{
-  constraint(ALLOC_IN_RC(ZR_regP));
-  match(RegL);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand ZRRegI() %{
-  constraint(ALLOC_IN_RC(ZR_regI));
-  match(RegI);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand ZRRegN() %{
-  constraint(ALLOC_IN_RC(ZR_regI));
-  match(RegN);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
diff --git a/src/hotspot/cpu/arm/assembler_arm.hpp b/src/hotspot/cpu/arm/assembler_arm.hpp
index 694660f431d..4e0bd822e74 100644
--- a/src/hotspot/cpu/arm/assembler_arm.hpp
+++ b/src/hotspot/cpu/arm/assembler_arm.hpp
@@ -40,29 +40,14 @@ enum AsmShift {
   lsl, lsr, asr, ror
 };
 
-#ifdef AARCH64
-enum AsmExtendOp {
-  ex_uxtb, ex_uxth, ex_uxtw, ex_uxtx,
-  ex_sxtb, ex_sxth, ex_sxtw, ex_sxtx,
-
-  ex_lsl = ex_uxtx
-};
-#endif
 
 enum AsmOffset {
-#ifdef AARCH64
-  basic_offset = 0b00,
-  pre_indexed  = 0b11,
-  post_indexed = 0b01
-#else
   basic_offset = 1 << 24,
   pre_indexed  = 1 << 24 | 1 << 21,
   post_indexed = 0
-#endif
 };
 
 
-#ifndef AARCH64
 enum AsmWriteback {
   no_writeback,
   writeback
@@ -72,7 +57,6 @@ enum AsmOffsetOp {
   sub_offset = 0,
   add_offset = 1
 };
-#endif
 
 
 // ARM Addressing Modes 2 and 3 - Load and store
@@ -84,21 +68,13 @@ class Address {
   AsmOffset _mode;
   RelocationHolder   _rspec;
   int       _shift_imm;
-#ifdef AARCH64
-  AsmExtendOp _extend;
-#else
   AsmShift  _shift;
   AsmOffsetOp _offset_op;
 
   static inline int abs(int x) { return x < 0 ? -x : x; }
   static inline int up (int x) { return x < 0 ?  0 : 1; }
-#endif
 
-#ifdef AARCH64
-  static const AsmExtendOp LSL = ex_lsl;
-#else
   static const AsmShift LSL = lsl;
-#endif
 
  public:
   Address() : _base(noreg) {}
@@ -109,12 +85,8 @@ class Address {
     _disp = offset;
     _mode = mode;
     _shift_imm = 0;
-#ifdef AARCH64
-    _extend = ex_lsl;
-#else
     _shift = lsl;
     _offset_op = add_offset;
-#endif
   }
 
 #ifdef ASSERT
@@ -124,27 +96,11 @@ class Address {
     _disp = in_bytes(offset);
     _mode = mode;
     _shift_imm = 0;
-#ifdef AARCH64
-    _extend = ex_lsl;
-#else
     _shift = lsl;
     _offset_op = add_offset;
-#endif
   }
 #endif
 
-#ifdef AARCH64
-  Address(Register rn, Register rm, AsmExtendOp extend = ex_lsl, int shift_imm = 0) {
-    assert ((extend == ex_uxtw) || (extend == ex_lsl) || (extend == ex_sxtw) || (extend == ex_sxtx), "invalid extend for address mode");
-    assert ((0 <= shift_imm) && (shift_imm <= 4), "shift amount is out of range");
-    _base = rn;
-    _index = rm;
-    _disp = 0;
-    _mode = basic_offset;
-    _extend = extend;
-    _shift_imm = shift_imm;
-  }
-#else
   Address(Register rn, Register rm, AsmShift shift = lsl,
           int shift_imm = 0, AsmOffset mode = basic_offset,
           AsmOffsetOp offset_op = add_offset) {
@@ -181,7 +137,6 @@ class Address {
     _mode = basic_offset;
     _offset_op = add_offset;
   }
-#endif // AARCH64
 
   // [base + index * wordSize]
   static Address indexed_ptr(Register base, Register index) {
@@ -211,25 +166,6 @@ class Address {
     return a;
   }
 
-#ifdef AARCH64
-  int encoding_simd() const {
-    assert(_index != SP, "encoding constraint");
-    assert(_disp == 0 || _mode == post_indexed,  "encoding constraint");
-    assert(_index == noreg || _mode == basic_offset, "encoding constraint");
-    assert(_mode == basic_offset || _mode == post_indexed, "encoding constraint");
-    assert(_extend == ex_lsl, "encoding constraint");
-    int index;
-    if (_index == noreg) {
-      if (_mode == post_indexed)
-        index = 0b100 << 5 | 31;
-      else
-        index = 0;
-    } else {
-      index = 0b100 << 5 | _index->encoding();
-    }
-    return index << 16 | _base->encoding_with_sp() << 5;
-  }
-#else /* !AARCH64 */
   int encoding2() const {
     assert(_mode == basic_offset || _base != PC, "unpredictable instruction");
     if (_index == noreg) {
@@ -287,7 +223,6 @@ class Address {
 
     return _base->encoding() << 16 | index;
   }
-#endif // !AARCH64
 
   Register base() const {
     return _base;
@@ -309,11 +244,6 @@ class Address {
     return _shift_imm;
   }
 
-#ifdef AARCH64
-  AsmExtendOp extend() const {
-    return _extend;
-  }
-#else
   AsmShift shift() const {
     return _shift;
   }
@@ -321,7 +251,6 @@ class Address {
   AsmOffsetOp offset_op() const {
     return _offset_op;
   }
-#endif
 
   bool uses(Register reg) const { return _base == reg || _index == reg; }
 
@@ -394,11 +323,7 @@ class VFP {
 };
 #endif
 
-#ifdef AARCH64
-#include "assembler_arm_64.hpp"
-#else
 #include "assembler_arm_32.hpp"
-#endif
 
 
 #endif // CPU_ARM_VM_ASSEMBLER_ARM_HPP
diff --git a/src/hotspot/cpu/arm/assembler_arm_64.cpp b/src/hotspot/cpu/arm/assembler_arm_64.cpp
deleted file mode 100644
index 1e06cca452d..00000000000
--- a/src/hotspot/cpu/arm/assembler_arm_64.cpp
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "asm/assembler.inline.hpp"
-#include "ci/ciEnv.hpp"
-#include "gc/shared/cardTableBarrierSet.hpp"
-#include "gc/shared/collectedHeap.inline.hpp"
-#include "interpreter/interpreter.hpp"
-#include "interpreter/interpreterRuntime.hpp"
-#include "interpreter/templateInterpreterGenerator.hpp"
-#include "memory/resourceArea.hpp"
-#include "prims/jvm_misc.hpp"
-#include "prims/methodHandles.hpp"
-#include "runtime/biasedLocking.hpp"
-#include "runtime/interfaceSupport.inline.hpp"
-#include "runtime/objectMonitor.hpp"
-#include "runtime/os.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "runtime/stubRoutines.hpp"
-#include "utilities/hashtable.hpp"
-#include "utilities/macros.hpp"
-
-// Returns whether given imm has equal bit fields <0:size-1> and <size:2*size-1>.
-inline bool Assembler::LogicalImmediate::has_equal_subpatterns(uintx imm, int size) {
-  uintx mask = right_n_bits(size);
-  uintx subpattern1 = mask_bits(imm, mask);
-  uintx subpattern2 = mask_bits(imm >> size, mask);
-  return subpattern1 == subpattern2;
-}
-
-// Returns least size that is a power of two from 2 to 64 with the proviso that given
-// imm is composed of repeating patterns of this size.
-inline int Assembler::LogicalImmediate::least_pattern_size(uintx imm) {
-  int size = BitsPerWord;
-  while (size > 2 && has_equal_subpatterns(imm, size >> 1)) {
-    size >>= 1;
-  }
-  return size;
-}
-
-// Returns count of set bits in given imm. Based on variable-precision SWAR algorithm.
-inline int Assembler::LogicalImmediate::population_count(uintx x) {
-  x -= ((x >> 1) & 0x5555555555555555L);
-  x = (((x >> 2) & 0x3333333333333333L) + (x & 0x3333333333333333L));
-  x = (((x >> 4) + x) & 0x0f0f0f0f0f0f0f0fL);
-  x += (x >> 8);
-  x += (x >> 16);
-  x += (x >> 32);
-  return(x & 0x7f);
-}
-
-// Let given x be <A:B> where B = 0 and least bit of A = 1. Returns <A:C>, where C is B-size set bits.
-inline uintx Assembler::LogicalImmediate::set_least_zeroes(uintx x) {
-  return x | (x - 1);
-}
-
-
-#ifdef ASSERT
-
-// Restores immediate by encoded bit masks.
-uintx Assembler::LogicalImmediate::decode() {
-  assert (_encoded, "should be");
-
-  int len_code = (_immN << 6) | ((~_imms) & 0x3f);
-  assert (len_code != 0, "should be");
-
-  int len = 6;
-  while (!is_set_nth_bit(len_code, len)) len--;
-  int esize = 1 << len;
-  assert (len > 0, "should be");
-  assert ((_is32bit ? 32 : 64) >= esize, "should be");
-
-  int levels = right_n_bits(len);
-  int S = _imms & levels;
-  int R = _immr & levels;
-
-  assert (S != levels, "should be");
-
-  uintx welem = right_n_bits(S + 1);
-  uintx wmask = (R == 0) ? welem : ((welem >> R) | (welem << (esize - R)));
-
-  for (int size = esize; size < 64; size <<= 1) {
-    wmask |= (wmask << size);
-  }
-
-  return wmask;
-}
-
-#endif
-
-
-// Constructs LogicalImmediate by given imm. Figures out if given imm can be used in AArch64 logical
-// instructions (AND, ANDS, EOR, ORR) and saves its encoding.
-void Assembler::LogicalImmediate::construct(uintx imm, bool is32) {
-  _is32bit = is32;
-
-  if (is32) {
-    assert(((imm >> 32) == 0) || (((intx)imm >> 31) == -1), "32-bit immediate is out of range");
-
-    // Replicate low 32 bits.
-    imm &= 0xffffffff;
-    imm |= imm << 32;
-  }
-
-  // All-zeroes and all-ones can not be encoded.
-  if (imm != 0 && (~imm != 0)) {
-
-    // Let LPS (least pattern size) be the least size (power of two from 2 to 64) of repeating
-    // patterns in the immediate. If immediate value can be encoded, it is encoded by pattern
-    // of exactly LPS size (due to structure of valid patterns). In order to verify
-    // that immediate value can be encoded, LPS is calculated and <LPS-1:0> bits of immediate
-    // are verified to be valid pattern.
-    int lps = least_pattern_size(imm);
-    uintx lps_mask = right_n_bits(lps);
-
-    // A valid pattern has one of the following forms:
-    //  | 0 x A | 1 x B | 0 x C |, where B > 0 and C > 0, or
-    //  | 1 x A | 0 x B | 1 x C |, where B > 0 and C > 0.
-    // For simplicity, the second form of the pattern is inverted into the first form.
-    bool inverted = imm & 0x1;
-    uintx pattern = (inverted ? ~imm : imm) & lps_mask;
-
-    //  | 0 x A | 1 x (B + C)   |
-    uintx without_least_zeroes = set_least_zeroes(pattern);
-
-    // Pattern is valid iff without least zeroes it is a power of two - 1.
-    if ((without_least_zeroes & (without_least_zeroes + 1)) == 0) {
-
-      // Count B as population count of pattern.
-      int bits_count = population_count(pattern);
-
-      // Count B+C as population count of pattern without least zeroes
-      int left_range = population_count(without_least_zeroes);
-
-      // S-prefix is a part of imms field which encodes LPS.
-      //  LPS  |  S prefix
-      //   64  |     not defined
-      //   32  |     0b0
-      //   16  |     0b10
-      //    8  |     0b110
-      //    4  |     0b1110
-      //    2  |     0b11110
-      int s_prefix = (lps == 64) ? 0 : ~set_least_zeroes(lps) & 0x3f;
-
-      // immN bit is set iff LPS == 64.
-      _immN = (lps == 64) ? 1 : 0;
-      assert (!is32 || (_immN == 0), "32-bit immediate should be encoded with zero N-bit");
-
-      // immr is the rotation size.
-      _immr = lps + (inverted ? 0 : bits_count) - left_range;
-
-      // imms is the field that encodes bits count and S-prefix.
-      _imms = ((inverted ? (lps - bits_count) : bits_count) - 1) | s_prefix;
-
-      _encoded = true;
-      assert (decode() == imm, "illegal encoding");
-
-      return;
-    }
-  }
-
-  _encoded = false;
-}
diff --git a/src/hotspot/cpu/arm/assembler_arm_64.hpp b/src/hotspot/cpu/arm/assembler_arm_64.hpp
deleted file mode 100644
index 51ca96248a3..00000000000
--- a/src/hotspot/cpu/arm/assembler_arm_64.hpp
+++ /dev/null
@@ -1,1718 +0,0 @@
-/*
- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef CPU_ARM_VM_ASSEMBLER_ARM_64_HPP
-#define CPU_ARM_VM_ASSEMBLER_ARM_64_HPP
-
-enum AsmShift12 {
-  lsl0, lsl12
-};
-
-enum AsmPrefetchOp {
-    pldl1keep = 0b00000,
-    pldl1strm,
-    pldl2keep,
-    pldl2strm,
-    pldl3keep,
-    pldl3strm,
-
-    plil1keep = 0b01000,
-    plil1strm,
-    plil2keep,
-    plil2strm,
-    plil3keep,
-    plil3strm,
-
-    pstl1keep = 0b10000,
-    pstl1strm,
-    pstl2keep,
-    pstl2strm,
-    pstl3keep,
-    pstl3strm,
-};
-
-// Shifted register operand for data processing instructions.
-class AsmOperand {
- private:
-  Register _reg;
-  AsmShift _shift;
-  int _shift_imm;
-
- public:
-  AsmOperand(Register reg) {
-    assert(reg != SP, "SP is not allowed in shifted register operand");
-    _reg = reg;
-    _shift = lsl;
-    _shift_imm = 0;
-  }
-
-  AsmOperand(Register reg, AsmShift shift, int shift_imm) {
-    assert(reg != SP, "SP is not allowed in shifted register operand");
-    assert(shift_imm >= 0, "shift amount should be non-negative");
-    _reg = reg;
-    _shift = shift;
-    _shift_imm = shift_imm;
-  }
-
-  Register reg() const {
-    return _reg;
-  }
-
-  AsmShift shift() const {
-    return _shift;
-  }
-
-  int shift_imm() const {
-    return _shift_imm;
-  }
-};
-
-
-class Assembler : public AbstractAssembler  {
-
- public:
-
-  static const int LogInstructionSize = 2;
-  static const int InstructionSize    = 1 << LogInstructionSize;
-
-  Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
-
-  static inline AsmCondition inverse(AsmCondition cond) {
-    assert ((cond != al) && (cond != nv), "AL and NV conditions cannot be inversed");
-    return (AsmCondition)((int)cond ^ 1);
-  }
-
-  // Returns value of nzcv flags conforming to the given condition.
-  static inline int flags_for_condition(AsmCondition cond) {
-    switch(cond) {            // NZCV
-      case mi: case lt: return 0b1000;
-      case eq: case le: return 0b0100;
-      case hs: case hi: return 0b0010;
-      case vs:          return 0b0001;
-      default:          return 0b0000;
-    }
-  }
-
-  // Immediate, encoded into logical instructions.
-  class LogicalImmediate {
-   private:
-    bool _encoded;
-    bool _is32bit;
-    int _immN;
-    int _immr;
-    int _imms;
-
-    static inline bool has_equal_subpatterns(uintx imm, int size);
-    static inline int least_pattern_size(uintx imm);
-    static inline int population_count(uintx x);
-    static inline uintx set_least_zeroes(uintx x);
-
-#ifdef ASSERT
-    uintx decode();
-#endif
-
-    void construct(uintx imm, bool is32);
-
-   public:
-    LogicalImmediate(uintx imm, bool is32 = false) { construct(imm, is32); }
-
-    // Returns true if given immediate can be used in AArch64 logical instruction.
-    bool is_encoded() const { return _encoded; }
-
-    bool is32bit() const { return _is32bit; }
-    int immN() const { assert(_encoded, "should be"); return _immN; }
-    int immr() const { assert(_encoded, "should be"); return _immr; }
-    int imms() const { assert(_encoded, "should be"); return _imms; }
-  };
-
-  // Immediate, encoded into arithmetic add/sub instructions.
-  class ArithmeticImmediate {
-   private:
-    bool _encoded;
-    int _imm;
-    AsmShift12 _shift;
-
-   public:
-    ArithmeticImmediate(intx x) {
-      if (is_unsigned_imm_in_range(x, 12, 0)) {
-        _encoded = true;
-        _imm = x;
-        _shift = lsl0;
-      } else if (is_unsigned_imm_in_range(x, 12, 12)) {
-        _encoded = true;
-        _imm = x >> 12;
-        _shift = lsl12;
-      } else {
-        _encoded = false;
-      }
-    }
-
-    ArithmeticImmediate(intx x, AsmShift12 sh) {
-      if (is_unsigned_imm_in_range(x, 12, 0)) {
-        _encoded = true;
-        _imm = x;
-        _shift = sh;
-      } else {
-        _encoded = false;
-      }
-    }
-
-    // Returns true if this immediate can be used in AArch64 arithmetic (add/sub/cmp/cmn) instructions.
-    bool is_encoded() const  { return _encoded; }
-
-    int imm() const          { assert(_encoded, "should be"); return _imm; }
-    AsmShift12 shift() const { assert(_encoded, "should be"); return _shift; }
-  };
-
-  static inline bool is_imm_in_range(intx value, int bits, int align_bits) {
-    intx sign_bits = (value >> (bits + align_bits - 1));
-    return ((value & right_n_bits(align_bits)) == 0) && ((sign_bits == 0) || (sign_bits == -1));
-  }
-
-  static inline int encode_imm(intx value, int bits, int align_bits, int low_bit_in_encoding) {
-    assert (is_imm_in_range(value, bits, align_bits), "immediate value is out of range");
-    return ((value >> align_bits) & right_n_bits(bits)) << low_bit_in_encoding;
-  }
-
-  static inline bool is_unsigned_imm_in_range(intx value, int bits, int align_bits) {
-    return (value >= 0) && ((value & right_n_bits(align_bits)) == 0) && ((value >> (align_bits + bits)) == 0);
-  }
-
-  static inline int encode_unsigned_imm(intx value, int bits, int align_bits, int low_bit_in_encoding) {
-    assert (is_unsigned_imm_in_range(value, bits, align_bits), "immediate value is out of range");
-    return (value >> align_bits) << low_bit_in_encoding;
-  }
-
-  static inline bool is_offset_in_range(intx offset, int bits) {
-    assert (bits == 14 || bits == 19 || bits == 26, "wrong bits number");
-    return is_imm_in_range(offset, bits, 2);
-  }
-
-  static inline int encode_offset(intx offset, int bits, int low_bit_in_encoding) {
-    return encode_imm(offset, bits, 2, low_bit_in_encoding);
-  }
-
-  // Returns true if given value can be used as immediate in arithmetic (add/sub/cmp/cmn) instructions.
-  static inline bool is_arith_imm_in_range(intx value) {
-    return ArithmeticImmediate(value).is_encoded();
-  }
-
-
-  // Load/store instructions
-
-#define F(mnemonic, opc) \
-  void mnemonic(Register rd, address literal_addr) {                                                       \
-    intx offset = literal_addr - pc();                                                                     \
-    assert (opc != 0b01 || offset == 0 || ((uintx)literal_addr & 7) == 0, "ldr target should be aligned"); \
-    assert (is_offset_in_range(offset, 19), "offset is out of range");                                     \
-    emit_int32(opc << 30 | 0b011 << 27 | encode_offset(offset, 19, 5) | rd->encoding_with_zr());           \
-  }
-
-  F(ldr_w, 0b00)
-  F(ldr,   0b01)
-  F(ldrsw, 0b10)
-#undef F
-
-#define F(mnemonic, opc) \
-  void mnemonic(FloatRegister rt, address literal_addr) {                                                  \
-    intx offset = literal_addr - pc();                                                                     \
-    assert (offset == 0 || ((uintx)literal_addr & right_n_bits(2 + opc)) == 0, "ldr target should be aligned"); \
-    assert (is_offset_in_range(offset, 19), "offset is out of range");                                     \
-    emit_int32(opc << 30 | 0b011100 << 24 | encode_offset(offset, 19, 5) | rt->encoding());                \
-  }
-
-  F(ldr_s, 0b00)
-  F(ldr_d, 0b01)
-  F(ldr_q, 0b10)
-#undef F
-
-#define F(mnemonic, size, o2, L, o1, o0) \
-  void mnemonic(Register rt, Register rn) {                                                                \
-    emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | 0b11111 << 16 |               \
-        o0 << 15 | 0b11111 << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr());                  \
-  }
-
-  F(ldxrb,   0b00, 0, 1, 0, 0)
-  F(ldaxrb,  0b00, 0, 1, 0, 1)
-  F(ldarb,   0b00, 1, 1, 0, 1)
-  F(ldxrh,   0b01, 0, 1, 0, 0)
-  F(ldaxrh,  0b01, 0, 1, 0, 1)
-  F(ldarh,   0b01, 1, 1, 0, 1)
-  F(ldxr_w,  0b10, 0, 1, 0, 0)
-  F(ldaxr_w, 0b10, 0, 1, 0, 1)
-  F(ldar_w,  0b10, 1, 1, 0, 1)
-  F(ldxr,    0b11, 0, 1, 0, 0)
-  F(ldaxr,   0b11, 0, 1, 0, 1)
-  F(ldar,    0b11, 1, 1, 0, 1)
-
-  F(stlrb,   0b00, 1, 0, 0, 1)
-  F(stlrh,   0b01, 1, 0, 0, 1)
-  F(stlr_w,  0b10, 1, 0, 0, 1)
-  F(stlr,    0b11, 1, 0, 0, 1)
-#undef F
-
-#define F(mnemonic, size, o2, L, o1, o0) \
-  void mnemonic(Register rs, Register rt, Register rn) {                                                     \
-    assert (rs != rt, "should be different");                                                                \
-    assert (rs != rn, "should be different");                                                                \
-    emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | rs->encoding_with_zr() << 16 |  \
-        o0 << 15 | 0b11111 << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr());                    \
-  }
-
-  F(stxrb,   0b00, 0, 0, 0, 0)
-  F(stlxrb,  0b00, 0, 0, 0, 1)
-  F(stxrh,   0b01, 0, 0, 0, 0)
-  F(stlxrh,  0b01, 0, 0, 0, 1)
-  F(stxr_w,  0b10, 0, 0, 0, 0)
-  F(stlxr_w, 0b10, 0, 0, 0, 1)
-  F(stxr,    0b11, 0, 0, 0, 0)
-  F(stlxr,   0b11, 0, 0, 0, 1)
-#undef F
-
-#define F(mnemonic, size, o2, L, o1, o0) \
-  void mnemonic(Register rt, Register rt2, Register rn) {                                                  \
-    assert (rt != rt2, "should be different");                                                             \
-    emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | 0b11111 << 16 |               \
-        o0 << 15 | rt2->encoding_with_zr() << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr());  \
-  }
-
-  F(ldxp_w,  0b10, 0, 1, 1, 0)
-  F(ldaxp_w, 0b10, 0, 1, 1, 1)
-  F(ldxp,    0b11, 0, 1, 1, 0)
-  F(ldaxp,   0b11, 0, 1, 1, 1)
-#undef F
-
-#define F(mnemonic, size, o2, L, o1, o0) \
-  void mnemonic(Register rs, Register rt, Register rt2, Register rn) {                                       \
-    assert (rs != rt, "should be different");                                                                \
-    assert (rs != rt2, "should be different");                                                               \
-    assert (rs != rn, "should be different");                                                                \
-    emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | rs->encoding_with_zr() << 16 |  \
-        o0 << 15 | rt2->encoding_with_zr() << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr());    \
-  }
-
-  F(stxp_w,  0b10, 0, 0, 1, 0)
-  F(stlxp_w, 0b10, 0, 0, 1, 1)
-  F(stxp,    0b11, 0, 0, 1, 0)
-  F(stlxp,   0b11, 0, 0, 1, 1)
-#undef F
-
-#define F(mnemonic, opc, V, L) \
-  void mnemonic(Register rt, Register rt2, Register rn, int offset = 0) {                                  \
-    assert (!L || rt != rt2, "should be different");                                                       \
-    int align_bits = 2 + (opc >> 1);                                                                       \
-    assert (is_imm_in_range(offset, 7, align_bits), "offset is out of range");                             \
-    emit_int32(opc << 30 | 0b101 << 27 | V << 26 | L << 22 | encode_imm(offset, 7, align_bits, 15) |       \
-        rt2->encoding_with_zr() << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr());             \
-  }
-
-  F(stnp_w,  0b00, 0, 0)
-  F(ldnp_w,  0b00, 0, 1)
-  F(stnp,    0b10, 0, 0)
-  F(ldnp,    0b10, 0, 1)
-#undef F
-
-#define F(mnemonic, opc, V, L) \
-  void mnemonic(FloatRegister rt, FloatRegister rt2, Register rn, int offset = 0) {                        \
-    assert (!L || (rt != rt2), "should be different");                                                     \
-    int align_bits = 2 + opc;                                                                              \
-    assert (is_imm_in_range(offset, 7, align_bits), "offset is out of range");                             \
-    emit_int32(opc << 30 | 0b101 << 27 | V << 26 | L << 22 | encode_imm(offset, 7, align_bits, 15) |       \
-        rt2->encoding() << 10 | rn->encoding_with_sp() << 5 | rt->encoding());                             \
-  }
-
-  F(stnp_s,  0b00, 1, 0)
-  F(stnp_d,  0b01, 1, 0)
-  F(stnp_q,  0b10, 1, 0)
-  F(ldnp_s,  0b00, 1, 1)
-  F(ldnp_d,  0b01, 1, 1)
-  F(ldnp_q,  0b10, 1, 1)
-#undef F
-
-#define F(mnemonic, size, V, opc) \
-  void mnemonic(Register rt, Address addr) { \
-    assert((addr.mode() == basic_offset) || (rt != addr.base()), "should be different");                    \
-    if (addr.index() == noreg) {                                                                            \
-      if ((addr.mode() == basic_offset) && is_unsigned_imm_in_range(addr.disp(), 12, size)) {               \
-        emit_int32(size << 30 | 0b111 << 27 | V << 26 | 0b01 << 24 | opc << 22 |                            \
-           encode_unsigned_imm(addr.disp(), 12, size, 10) |                                                 \
-           addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr());                                  \
-      } else {                                                                                              \
-        assert(is_imm_in_range(addr.disp(), 9, 0), "offset is out of range");                               \
-        emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | encode_imm(addr.disp(), 9, 0, 12) |     \
-           addr.mode() << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr());              \
-      }                                                                                                     \
-    } else {                                                                                                \
-      assert (addr.disp() == 0, "non-zero displacement for [reg + reg] address mode");                      \
-      assert ((addr.shift_imm() == 0) || (addr.shift_imm() == size), "invalid shift amount");               \
-      emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | 1 << 21 |                                 \
-         addr.index()->encoding_with_zr() << 16 | addr.extend() << 13 | (addr.shift_imm() != 0) << 12 |     \
-         0b10 << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr());                       \
-    }                                                                                                       \
-  }
-
-  F(strb,    0b00, 0, 0b00)
-  F(ldrb,    0b00, 0, 0b01)
-  F(ldrsb,   0b00, 0, 0b10)
-  F(ldrsb_w, 0b00, 0, 0b11)
-
-  F(strh,    0b01, 0, 0b00)
-  F(ldrh,    0b01, 0, 0b01)
-  F(ldrsh,   0b01, 0, 0b10)
-  F(ldrsh_w, 0b01, 0, 0b11)
-
-  F(str_w,   0b10, 0, 0b00)
-  F(ldr_w,   0b10, 0, 0b01)
-  F(ldrsw,   0b10, 0, 0b10)
-
-  F(str,     0b11, 0, 0b00)
-  F(ldr,     0b11, 0, 0b01)
-#undef F
-
-#define F(mnemonic, size, V, opc) \
-  void mnemonic(AsmPrefetchOp prfop, Address addr) { \
-    assert (addr.mode() == basic_offset, #mnemonic " supports only basic_offset address mode");             \
-    if (addr.index() == noreg) {                                                                            \
-      if (is_unsigned_imm_in_range(addr.disp(), 12, size)) {                                                \
-        emit_int32(size << 30 | 0b111 << 27 | V << 26 | 0b01 << 24 | opc << 22 |                            \
-           encode_unsigned_imm(addr.disp(), 12, size, 10) |                                                 \
-           addr.base()->encoding_with_sp() << 5 | prfop);                                                   \
-      } else {                                                                                              \
-        assert(is_imm_in_range(addr.disp(), 9, 0), "offset is out of range");                               \
-        emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | encode_imm(addr.disp(), 9, 0, 12) |     \
-           addr.base()->encoding_with_sp() << 5 | prfop);                                                   \
-      }                                                                                                     \
-    } else {                                                                                                \
-      assert (addr.disp() == 0, "non-zero displacement for [reg + reg] address mode");                      \
-      assert ((addr.shift_imm() == 0) || (addr.shift_imm() == size), "invalid shift amount");               \
-      emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | 1 << 21 |                                 \
-         addr.index()->encoding_with_zr() << 16 | addr.extend() << 13 | (addr.shift_imm() != 0) << 12 |     \
-         0b10 << 10 | addr.base()->encoding_with_sp() << 5 | prfop);                                        \
-    }                                                                                                       \
-  }
-
-  F(prfm, 0b11, 0, 0b10)
-#undef F
-
-#define F(mnemonic, size, V, opc) \
-  void mnemonic(FloatRegister rt, Address addr) { \
-    int align_bits = (((opc & 0b10) >> 1) << 2) | size;                                                     \
-    if (addr.index() == noreg) {                                                                            \
-      if ((addr.mode() == basic_offset) && is_unsigned_imm_in_range(addr.disp(), 12, align_bits)) {         \
-        emit_int32(size << 30 | 0b111 << 27 | V << 26 | 0b01 << 24 | opc << 22 |                            \
-           encode_unsigned_imm(addr.disp(), 12, align_bits, 10) |                                           \
-           addr.base()->encoding_with_sp() << 5 | rt->encoding());                                          \
-      } else {                                                                                              \
-        assert(is_imm_in_range(addr.disp(), 9, 0), "offset is out of range");                               \
-        emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | encode_imm(addr.disp(), 9, 0, 12) |     \
-           addr.mode() << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding());                      \
-      }                                                                                                     \
-    } else {                                                                                                \
-      assert (addr.disp() == 0, "non-zero displacement for [reg + reg] address mode");                      \
-      assert ((addr.shift_imm() == 0) || (addr.shift_imm() == align_bits), "invalid shift amount");         \
-      emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | 1 << 21 |                                 \
-         addr.index()->encoding_with_zr() << 16 | addr.extend() << 13 | (addr.shift_imm() != 0) << 12 |     \
-         0b10 << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding());                               \
-    }                                                                                                       \
-  }
-
-  F(str_b, 0b00, 1, 0b00)
-  F(ldr_b, 0b00, 1, 0b01)
-  F(str_h, 0b01, 1, 0b00)
-  F(ldr_h, 0b01, 1, 0b01)
-  F(str_s, 0b10, 1, 0b00)
-  F(ldr_s, 0b10, 1, 0b01)
-  F(str_d, 0b11, 1, 0b00)
-  F(ldr_d, 0b11, 1, 0b01)
-  F(str_q, 0b00, 1, 0b10)
-  F(ldr_q, 0b00, 1, 0b11)
-#undef F
-
-#define F(mnemonic, opc, V, L) \
-  void mnemonic(Register rt, Register rt2, Address addr) {                                                         \
-    assert((addr.mode() == basic_offset) || ((rt != addr.base()) && (rt2 != addr.base())), "should be different"); \
-    assert(!L || (rt != rt2), "should be different");                                                              \
-    assert(addr.index() == noreg, "[reg + reg] address mode is not available for load/store pair");                \
-    int align_bits = 2 + (opc >> 1);                                                                               \
-    int mode_encoding = (addr.mode() == basic_offset) ? 0b10 : addr.mode();                                        \
-    assert(is_imm_in_range(addr.disp(), 7, align_bits), "offset is out of range");                                 \
-    emit_int32(opc << 30 | 0b101 << 27 | V << 26 | mode_encoding << 23 | L << 22 |                                 \
-       encode_imm(addr.disp(), 7, align_bits, 15) | rt2->encoding_with_zr() << 10 |                                \
-       addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr());                                             \
-  }
-
-  F(stp_w, 0b00, 0, 0)
-  F(ldp_w, 0b00, 0, 1)
-  F(ldpsw, 0b01, 0, 1)
-  F(stp,   0b10, 0, 0)
-  F(ldp,   0b10, 0, 1)
-#undef F
-
-#define F(mnemonic, opc, V, L) \
-  void mnemonic(FloatRegister rt, FloatRegister rt2, Address addr) {                                                         \
-    assert(!L || (rt != rt2), "should be different");                                                              \
-    assert(addr.index() == noreg, "[reg + reg] address mode is not available for load/store pair");                \
-    int align_bits = 2 + opc;                                                                                      \
-    int mode_encoding = (addr.mode() == basic_offset) ? 0b10 : addr.mode();                                        \
-    assert(is_imm_in_range(addr.disp(), 7, align_bits), "offset is out of range");                                 \
-    emit_int32(opc << 30 | 0b101 << 27 | V << 26 | mode_encoding << 23 | L << 22 |                                 \
-       encode_imm(addr.disp(), 7, align_bits, 15) | rt2->encoding() << 10 |                                        \
-       addr.base()->encoding_with_sp() << 5 | rt->encoding());                                                     \
-  }
-
-  F(stp_s, 0b00, 1, 0)
-  F(ldp_s, 0b00, 1, 1)
-  F(stp_d, 0b01, 1, 0)
-  F(ldp_d, 0b01, 1, 1)
-  F(stp_q, 0b10, 1, 0)
-  F(ldp_q, 0b10, 1, 1)
-#undef F
-
-  // Data processing instructions
-
-#define F(mnemonic, sf, opc) \
-  void mnemonic(Register rd, Register rn, const LogicalImmediate& imm) {                      \
-    assert (imm.is_encoded(), "illegal immediate for logical instruction");                   \
-    assert (imm.is32bit() == (sf == 0), "immediate size does not match instruction size");    \
-    emit_int32(sf << 31 | opc << 29 | 0b100100 << 23 | imm.immN() << 22 | imm.immr() << 16 |  \
-        imm.imms() << 10 | rn->encoding_with_zr() << 5 |                                      \
-        ((opc == 0b11) ? rd->encoding_with_zr() : rd->encoding_with_sp()));                   \
-  }                                                                                           \
-  void mnemonic(Register rd, Register rn, uintx imm) {                                        \
-    LogicalImmediate limm(imm, (sf == 0));                                                    \
-    mnemonic(rd, rn, limm);                                                                   \
-  }                                                                                           \
-  void mnemonic(Register rd, Register rn, unsigned int imm) {                                 \
-    mnemonic(rd, rn, (uintx)imm);                                                             \
-  }
-
-  F(andr_w, 0, 0b00)
-  F(orr_w,  0, 0b01)
-  F(eor_w,  0, 0b10)
-  F(ands_w, 0, 0b11)
-
-  F(andr, 1, 0b00)
-  F(orr,  1, 0b01)
-  F(eor,  1, 0b10)
-  F(ands, 1, 0b11)
-#undef F
-
-  void tst(Register rn, unsigned int imm) {
-    ands(ZR, rn, imm);
-  }
-
-  void tst_w(Register rn, unsigned int imm) {
-    ands_w(ZR, rn, imm);
-  }
-
-#define F(mnemonic, sf, opc, N) \
-  void mnemonic(Register rd, Register rn, AsmOperand operand) { \
-    assert (operand.shift_imm() >> (5 + sf) == 0, "shift amount is too large");          \
-    emit_int32(sf << 31 | opc << 29 | 0b01010 << 24 | operand.shift() << 22 | N << 21 |  \
-        operand.reg()->encoding_with_zr() << 16 | operand.shift_imm() << 10 |            \
-        rn->encoding_with_zr() << 5 | rd->encoding_with_zr());                           \
-  }
-
-  F(andr_w, 0, 0b00, 0)
-  F(bic_w,  0, 0b00, 1)
-  F(orr_w,  0, 0b01, 0)
-  F(orn_w,  0, 0b01, 1)
-  F(eor_w,  0, 0b10, 0)
-  F(eon_w,  0, 0b10, 1)
-  F(ands_w, 0, 0b11, 0)
-  F(bics_w, 0, 0b11, 1)
-
-  F(andr, 1, 0b00, 0)
-  F(bic,  1, 0b00, 1)
-  F(orr,  1, 0b01, 0)
-  F(orn,  1, 0b01, 1)
-  F(eor,  1, 0b10, 0)
-  F(eon,  1, 0b10, 1)
-  F(ands, 1, 0b11, 0)
-  F(bics, 1, 0b11, 1)
-#undef F
-
-  void tst(Register rn, AsmOperand operand) {
-    ands(ZR, rn, operand);
-  }
-
-  void tst_w(Register rn, AsmOperand operand) {
-    ands_w(ZR, rn, operand);
-  }
-
-  void mvn(Register rd, AsmOperand operand) {
-    orn(rd, ZR, operand);
-  }
-
-  void mvn_w(Register rd, AsmOperand operand) {
-    orn_w(rd, ZR, operand);
-  }
-
-#define F(mnemonic, sf, op, S) \
-  void mnemonic(Register rd, Register rn, const ArithmeticImmediate& imm) {                       \
-    assert(imm.is_encoded(), "immediate is out of range");                                        \
-    emit_int32(sf << 31 | op << 30 | S << 29 | 0b10001 << 24 | imm.shift() << 22 |                \
-        imm.imm() << 10 | rn->encoding_with_sp() << 5 |                                           \
-        (S == 1 ? rd->encoding_with_zr() : rd->encoding_with_sp()));                              \
-  }                                                                                               \
-  void mnemonic(Register rd, Register rn, int imm) {                                              \
-    mnemonic(rd, rn, ArithmeticImmediate(imm));                                                   \
-  }                                                                                               \
-  void mnemonic(Register rd, Register rn, int imm, AsmShift12 shift) {                            \
-    mnemonic(rd, rn, ArithmeticImmediate(imm, shift));                                            \
-  }                                                                                               \
-  void mnemonic(Register rd, Register rn, Register rm, AsmExtendOp extend, int shift_imm = 0) {   \
-    assert ((0 <= shift_imm) && (shift_imm <= 4), "shift amount is out of range");                \
-    emit_int32(sf << 31 | op << 30 | S << 29 | 0b01011001 << 21 | rm->encoding_with_zr() << 16 |  \
-        extend << 13 | shift_imm << 10 | rn->encoding_with_sp() << 5 |                            \
-        (S == 1 ? rd->encoding_with_zr() : rd->encoding_with_sp()));                              \
-  }                                                                                               \
-  void mnemonic(Register rd, Register rn, AsmOperand operand) {                                   \
-    assert (operand.shift() != ror, "illegal shift type");                                        \
-    assert (operand.shift_imm() >> (5 + sf) == 0, "shift amount is too large");                   \
-    emit_int32(sf << 31 | op << 30 | S << 29 | 0b01011 << 24 | operand.shift() << 22 |            \
-        operand.reg()->encoding_with_zr() << 16 | operand.shift_imm() << 10 |                     \
-        rn->encoding_with_zr() << 5 | rd->encoding_with_zr());                                    \
-  }
-
-  F(add_w,  0, 0, 0)
-  F(adds_w, 0, 0, 1)
-  F(sub_w,  0, 1, 0)
-  F(subs_w, 0, 1, 1)
-
-  F(add,    1, 0, 0)
-  F(adds,   1, 0, 1)
-  F(sub,    1, 1, 0)
-  F(subs,   1, 1, 1)
-#undef F
-
-  void mov(Register rd, Register rm) {
-    if ((rd == SP) || (rm == SP)) {
-      add(rd, rm, 0);
-    } else {
-      orr(rd, ZR, rm);
-    }
-  }
-
-  void mov_w(Register rd, Register rm) {
-    if ((rd == SP) || (rm == SP)) {
-      add_w(rd, rm, 0);
-    } else {
-      orr_w(rd, ZR, rm);
-    }
-  }
-
-  void cmp(Register rn, int imm) {
-    subs(ZR, rn, imm);
-  }
-
-  void cmp_w(Register rn, int imm) {
-    subs_w(ZR, rn, imm);
-  }
-
-  void cmp(Register rn, Register rm) {
-    assert (rm != SP, "SP should not be used as the 2nd operand of cmp");
-    if (rn == SP) {
-      subs(ZR, rn, rm, ex_uxtx);
-    } else {
-      subs(ZR, rn, rm);
-    }
-  }
-
-  void cmp_w(Register rn, Register rm) {
-    assert ((rn != SP) && (rm != SP), "SP should not be used in 32-bit cmp");
-    subs_w(ZR, rn, rm);
-  }
-
-  void cmp(Register rn, AsmOperand operand) {
-    assert (rn != SP, "SP is not allowed in cmp with shifted register (AsmOperand)");
-    subs(ZR, rn, operand);
-  }
-
-  void cmn(Register rn, int imm) {
-    adds(ZR, rn, imm);
-  }
-
-  void cmn_w(Register rn, int imm) {
-    adds_w(ZR, rn, imm);
-  }
-
-  void cmn(Register rn, Register rm) {
-    assert (rm != SP, "SP should not be used as the 2nd operand of cmp");
-    if (rn == SP) {
-      adds(ZR, rn, rm, ex_uxtx);
-    } else {
-      adds(ZR, rn, rm);
-    }
-  }
-
-  void cmn_w(Register rn, Register rm) {
-    assert ((rn != SP) && (rm != SP), "SP should not be used in 32-bit cmp");
-    adds_w(ZR, rn, rm);
-  }
-
-  void neg(Register rd, Register rm) {
-    sub(rd, ZR, rm);
-  }
-
-  void neg_w(Register rd, Register rm) {
-    sub_w(rd, ZR, rm);
-  }
-
-#define F(mnemonic, sf, op, S) \
-  void mnemonic(Register rd, Register rn, Register rm) { \
-    emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010000 << 21 | rm->encoding_with_zr() << 16 |   \
-        rn->encoding_with_zr() << 5 | rd->encoding_with_zr());                                     \
-  }
-
-  F(adc_w,  0, 0, 0)
-  F(adcs_w, 0, 0, 1)
-  F(sbc_w,  0, 1, 0)
-  F(sbcs_w, 0, 1, 1)
-
-  F(adc,    1, 0, 0)
-  F(adcs,   1, 0, 1)
-  F(sbc,    1, 1, 0)
-  F(sbcs,   1, 1, 1)
-#undef F
-
-#define F(mnemonic, sf, N) \
-  void mnemonic(Register rd, Register rn, Register rm, int lsb) { \
-    assert ((lsb >> (5 + sf)) == 0, "illegal least significant bit position");        \
-    emit_int32(sf << 31 | 0b100111 << 23 | N << 22 | rm->encoding_with_zr() << 16 |   \
-        lsb << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr());            \
-  }
-
-  F(extr_w,  0, 0)
-  F(extr,    1, 1)
-#undef F
-
-#define F(mnemonic, sf, opc) \
-  void mnemonic(Register rd, int imm, int shift) { \
-    assert ((imm >> 16) == 0, "immediate is out of range");                       \
-    assert (((shift & 0xf) == 0) && ((shift >> (5 + sf)) == 0), "invalid shift"); \
-    emit_int32(sf << 31 | opc << 29 | 0b100101 << 23 | (shift >> 4) << 21 |       \
-        imm << 5 | rd->encoding_with_zr());                                       \
-  }
-
-  F(movn_w,  0, 0b00)
-  F(movz_w,  0, 0b10)
-  F(movk_w,  0, 0b11)
-  F(movn,    1, 0b00)
-  F(movz,    1, 0b10)
-  F(movk,    1, 0b11)
-#undef F
-
-  void mov(Register rd, int imm) {
-    assert ((imm >> 16) == 0, "immediate is out of range");
-    movz(rd, imm, 0);
-  }
-
-  void mov_w(Register rd, int imm) {
-    assert ((imm >> 16) == 0, "immediate is out of range");
-    movz_w(rd, imm, 0);
-  }
-
-#define F(mnemonic, sf, op, S) \
-  void mnemonic(Register rn, int imm, int nzcv, AsmCondition cond) { \
-    assert ((imm >> 5) == 0, "immediate is out of range");                      \
-    assert ((nzcv >> 4) == 0, "illegal nzcv");                                  \
-    emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010010 << 21 | imm << 16 |   \
-         cond << 12 | 1 << 11 | rn->encoding_with_zr() << 5 | nzcv);            \
-  }
-
-  F(ccmn_w, 0, 0, 1)
-  F(ccmp_w, 0, 1, 1)
-  F(ccmn,   1, 0, 1)
-  F(ccmp,   1, 1, 1)
-#undef F
-
-#define F(mnemonic, sf, op, S) \
-  void mnemonic(Register rn, Register rm, int nzcv, AsmCondition cond) { \
-    assert ((nzcv >> 4) == 0, "illegal nzcv");                                                    \
-    emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010010 << 21 | rm->encoding_with_zr() << 16 |  \
-        cond << 12 | rn->encoding_with_zr() << 5 | nzcv);                                         \
-  }
-
-  F(ccmn_w, 0, 0, 1)
-  F(ccmp_w, 0, 1, 1)
-  F(ccmn,   1, 0, 1)
-  F(ccmp,   1, 1, 1)
-#undef F
-
-#define F(mnemonic, sf, op, S, op2) \
-  void mnemonic(Register rd, Register rn, Register rm, AsmCondition cond) { \
-    emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010100 << 21 | rm->encoding_with_zr() << 16 |  \
-        cond << 12 | op2 << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr());           \
-  }
-
-  F(csel_w,  0, 0, 0, 0b00)
-  F(csinc_w, 0, 0, 0, 0b01)
-  F(csinv_w, 0, 1, 0, 0b00)
-  F(csneg_w, 0, 1, 0, 0b01)
-
-  F(csel,    1, 0, 0, 0b00)
-  F(csinc,   1, 0, 0, 0b01)
-  F(csinv,   1, 1, 0, 0b00)
-  F(csneg,   1, 1, 0, 0b01)
-#undef F
-
-  void cset(Register rd, AsmCondition cond) {
-    csinc(rd, ZR, ZR, inverse(cond));
-  }
-
-  void cset_w(Register rd, AsmCondition cond) {
-    csinc_w(rd, ZR, ZR, inverse(cond));
-  }
-
-  void csetm(Register rd, AsmCondition cond) {
-    csinv(rd, ZR, ZR, inverse(cond));
-  }
-
-  void csetm_w(Register rd, AsmCondition cond) {
-    csinv_w(rd, ZR, ZR, inverse(cond));
-  }
-
-  void cinc(Register rd, Register rn, AsmCondition cond) {
-    csinc(rd, rn, rn, inverse(cond));
-  }
-
-  void cinc_w(Register rd, Register rn, AsmCondition cond) {
-    csinc_w(rd, rn, rn, inverse(cond));
-  }
-
-  void cinv(Register rd, Register rn, AsmCondition cond) {
-    csinv(rd, rn, rn, inverse(cond));
-  }
-
-  void cinv_w(Register rd, Register rn, AsmCondition cond) {
-    csinv_w(rd, rn, rn, inverse(cond));
-  }
-
-#define F(mnemonic, sf, S, opcode) \
-  void mnemonic(Register rd, Register rn) { \
-    emit_int32(sf << 31 | 1 << 30 | S << 29 | 0b11010110 << 21 | opcode << 10 |  \
-        rn->encoding_with_zr() << 5 | rd->encoding_with_zr());                   \
-  }
-
-  F(rbit_w,  0, 0, 0b000000)
-  F(rev16_w, 0, 0, 0b000001)
-  F(rev_w,   0, 0, 0b000010)
-  F(clz_w,   0, 0, 0b000100)
-  F(cls_w,   0, 0, 0b000101)
-
-  F(rbit,    1, 0, 0b000000)
-  F(rev16,   1, 0, 0b000001)
-  F(rev32,   1, 0, 0b000010)
-  F(rev,     1, 0, 0b000011)
-  F(clz,     1, 0, 0b000100)
-  F(cls,     1, 0, 0b000101)
-#undef F
-
-#define F(mnemonic, sf, S, opcode) \
-  void mnemonic(Register rd, Register rn, Register rm) { \
-    emit_int32(sf << 31 | S << 29 | 0b11010110 << 21 | rm->encoding_with_zr() << 16 |  \
-        opcode << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr());          \
-  }
-
-  F(udiv_w,  0, 0, 0b000010)
-  F(sdiv_w,  0, 0, 0b000011)
-  F(lslv_w,  0, 0, 0b001000)
-  F(lsrv_w,  0, 0, 0b001001)
-  F(asrv_w,  0, 0, 0b001010)
-  F(rorv_w,  0, 0, 0b001011)
-
-  F(udiv,    1, 0, 0b000010)
-  F(sdiv,    1, 0, 0b000011)
-  F(lslv,    1, 0, 0b001000)
-  F(lsrv,    1, 0, 0b001001)
-  F(asrv,    1, 0, 0b001010)
-  F(rorv,    1, 0, 0b001011)
-#undef F
-
-#define F(mnemonic, sf, op31, o0) \
-  void mnemonic(Register rd, Register rn, Register rm, Register ra) { \
-    emit_int32(sf << 31 | 0b11011 << 24 | op31 << 21 | rm->encoding_with_zr() << 16 |                     \
-        o0 << 15 | ra->encoding_with_zr() << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr());  \
-  }
-
-  F(madd_w,  0, 0b000, 0)
-  F(msub_w,  0, 0b000, 1)
-  F(madd,    1, 0b000, 0)
-  F(msub,    1, 0b000, 1)
-
-  F(smaddl,  1, 0b001, 0)
-  F(smsubl,  1, 0b001, 1)
-  F(umaddl,  1, 0b101, 0)
-  F(umsubl,  1, 0b101, 1)
-#undef F
-
-  void mul(Register rd, Register rn, Register rm) {
-      madd(rd, rn, rm, ZR);
-  }
-
-  void mul_w(Register rd, Register rn, Register rm) {
-      madd_w(rd, rn, rm, ZR);
-  }
-
-#define F(mnemonic, sf, op31, o0) \
-  void mnemonic(Register rd, Register rn, Register rm) { \
-    emit_int32(sf << 31 | 0b11011 << 24 | op31 << 21 | rm->encoding_with_zr() << 16 |      \
-        o0 << 15 | 0b11111 << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr());  \
-  }
-
-  F(smulh,   1, 0b010, 0)
-  F(umulh,   1, 0b110, 0)
-#undef F
-
-#define F(mnemonic, op) \
-  void mnemonic(Register rd, address addr) { \
-    intx offset;                                                        \
-    if (op == 0) {                                                      \
-      offset = addr - pc();                                             \
-    } else {                                                            \
-      offset = (((intx)addr) - (((intx)pc()) & ~0xfff)) >> 12;          \
-    }                                                                   \
-    assert (is_imm_in_range(offset, 21, 0), "offset is out of range");  \
-    emit_int32(op << 31 | (offset & 3) << 29 | 0b10000 << 24 |          \
-        encode_imm(offset >> 2, 19, 0, 5) | rd->encoding_with_zr());    \
-  }                                                                     \
-
-  F(adr,   0)
-  F(adrp,  1)
-#undef F
-
-  void adr(Register rd, Label& L) {
-    adr(rd, target(L));
-  }
-
-#define F(mnemonic, sf, opc, N)                                                \
-  void mnemonic(Register rd, Register rn, int immr, int imms) {                \
-    assert ((immr >> (5 + sf)) == 0, "immr is out of range");                  \
-    assert ((imms >> (5 + sf)) == 0, "imms is out of range");                  \
-    emit_int32(sf << 31 | opc << 29 | 0b100110 << 23 | N << 22 | immr << 16 |  \
-        imms << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr());    \
-  }
-
-  F(sbfm_w, 0, 0b00, 0)
-  F(bfm_w,  0, 0b01, 0)
-  F(ubfm_w, 0, 0b10, 0)
-
-  F(sbfm, 1, 0b00, 1)
-  F(bfm,  1, 0b01, 1)
-  F(ubfm, 1, 0b10, 1)
-#undef F
-
-#define F(alias, mnemonic, sf, immr, imms) \
-  void alias(Register rd, Register rn, int lsb, int width) {                        \
-    assert ((lsb >> (5 + sf)) == 0, "lsb is out of range");                         \
-    assert ((1 <= width) && (width <= (32 << sf) - lsb), "width is out of range");  \
-    mnemonic(rd, rn, immr, imms);                                                   \
-  }
-
-  F(bfi_w,   bfm_w,  0, (-lsb) & 0x1f, width - 1)
-  F(bfi,     bfm,    1, (-lsb) & 0x3f, width - 1)
-  F(bfxil_w, bfm_w,  0, lsb,           lsb + width - 1)
-  F(bfxil,   bfm,    1, lsb,           lsb + width - 1)
-  F(sbfiz_w, sbfm_w, 0, (-lsb) & 0x1f, width - 1)
-  F(sbfiz,   sbfm,   1, (-lsb) & 0x3f, width - 1)
-  F(sbfx_w,  sbfm_w, 0, lsb,           lsb + width - 1)
-  F(sbfx,    sbfm,   1, lsb,           lsb + width - 1)
-  F(ubfiz_w, ubfm_w, 0, (-lsb) & 0x1f, width - 1)
-  F(ubfiz,   ubfm,   1, (-lsb) & 0x3f, width - 1)
-  F(ubfx_w,  ubfm_w, 0, lsb,           lsb + width - 1)
-  F(ubfx,    ubfm,   1, lsb,           lsb + width - 1)
-#undef F
-
-#define F(alias, mnemonic, sf, immr, imms) \
-  void alias(Register rd, Register rn, int shift) {              \
-    assert ((shift >> (5 + sf)) == 0, "shift is out of range");  \
-    mnemonic(rd, rn, immr, imms);                                \
-  }
-
-  F(_asr_w, sbfm_w, 0, shift, 31)
-  F(_asr,   sbfm,   1, shift, 63)
-  F(_lsl_w, ubfm_w, 0, (-shift) & 0x1f, 31 - shift)
-  F(_lsl,   ubfm,   1, (-shift) & 0x3f, 63 - shift)
-  F(_lsr_w, ubfm_w, 0, shift, 31)
-  F(_lsr,   ubfm,   1, shift, 63)
-#undef F
-
-#define F(alias, mnemonic, immr, imms) \
-  void alias(Register rd, Register rn) {   \
-    mnemonic(rd, rn, immr, imms);          \
-  }
-
-  F(sxtb_w, sbfm_w, 0, 7)
-  F(sxtb,   sbfm,   0, 7)
-  F(sxth_w, sbfm_w, 0, 15)
-  F(sxth,   sbfm,   0, 15)
-  F(sxtw,   sbfm,   0, 31)
-  F(uxtb_w, ubfm_w, 0, 7)
-  F(uxtb,   ubfm,   0, 7)
-  F(uxth_w, ubfm_w, 0, 15)
-  F(uxth,   ubfm,   0, 15)
-#undef F
-
-  // Branch instructions
-
-#define F(mnemonic, op) \
-  void mnemonic(Register rn) {                                                             \
-    emit_int32(0b1101011 << 25 | op << 21 | 0b11111 << 16 | rn->encoding_with_zr() << 5);  \
-  }
-
-  F(br,  0b00)
-  F(blr, 0b01)
-  F(ret, 0b10)
-#undef F
-
-  void ret() {
-    ret(LR);
-  }
-
-#define F(mnemonic, op) \
-  void mnemonic(address target) {                                         \
-    intx offset = target - pc();                                          \
-    assert (is_offset_in_range(offset, 26), "offset is out of range");    \
-    emit_int32(op << 31 | 0b00101 << 26 | encode_offset(offset, 26, 0));  \
-  }
-
-  F(b,  0)
-  F(bl, 1)
-#undef F
-
-  void b(address target, AsmCondition cond) {
-    if (cond == al) {
-      b(target);
-    } else {
-      intx offset = target - pc();
-      assert (is_offset_in_range(offset, 19), "offset is out of range");
-      emit_int32(0b0101010 << 25 | encode_offset(offset, 19, 5) | cond);
-    }
-  }
-
-
-#define F(mnemonic, sf, op)                                             \
-  void mnemonic(Register rt, address target) {                          \
-    intx offset = target - pc();                                        \
-    assert (is_offset_in_range(offset, 19), "offset is out of range");  \
-    emit_int32(sf << 31 | 0b011010 << 25 | op << 24 | encode_offset(offset, 19, 5) | rt->encoding_with_zr()); \
-  }                                                                     \
-
-  F(cbz_w,  0, 0)
-  F(cbnz_w, 0, 1)
-  F(cbz,    1, 0)
-  F(cbnz,   1, 1)
-#undef F
-
-#define F(mnemonic, op)                                                 \
-  void mnemonic(Register rt, int bit, address target) {                 \
-    intx offset = target - pc();                                        \
-    assert (is_offset_in_range(offset, 14), "offset is out of range");  \
-    assert (0 <= bit && bit < 64, "bit number is out of range");        \
-    emit_int32((bit >> 5) << 31 | 0b011011 << 25 | op << 24 | (bit & 0x1f) << 19 | \
-        encode_offset(offset, 14, 5) | rt->encoding_with_zr());         \
-  }                                                                     \
-
-  F(tbz,  0)
-  F(tbnz, 1)
-#undef F
-
-  // System instructions
-
-  enum DMB_Opt {
-    DMB_ld  = 0b1101,
-    DMB_st  = 0b1110,
-    DMB_all = 0b1111
-  };
-
-#define F(mnemonic, L, op0, op1, CRn, op2, Rt) \
-  void mnemonic(DMB_Opt option) {                                       \
-    emit_int32(0b1101010100 << 22 | L << 21 | op0 << 19 | op1 << 16 |   \
-        CRn << 12 | option << 8 | op2 << 5 | Rt);                       \
-  }
-
-  F(dsb,  0, 0b00, 0b011, 0b0011, 0b100, 0b11111)
-  F(dmb,  0, 0b00, 0b011, 0b0011, 0b101, 0b11111)
-#undef F
-
-#define F(mnemonic, L, op0, op1, CRn, Rt) \
-  void mnemonic(int imm) {                                              \
-    assert ((imm >> 7) == 0, "immediate is out of range");              \
-    emit_int32(0b1101010100 << 22 | L << 21 | op0 << 19 | op1 << 16 |   \
-        CRn << 12 | imm << 5 | Rt);                                     \
-  }
-
-  F(hint, 0, 0b00, 0b011, 0b0010, 0b11111)
-#undef F
-
-  void nop() {
-    hint(0);
-  }
-
-  void yield() {
-    hint(1);
-  }
-
-#define F(mnemonic, opc, op2, LL) \
-  void mnemonic(int imm = 0) {                                           \
-    assert ((imm >> 16) == 0, "immediate is out of range");              \
-    emit_int32(0b11010100 << 24 | opc << 21 | imm << 5 | op2 << 2 | LL); \
-  }
-
-  F(brk, 0b001, 0b000, 0b00)
-  F(hlt, 0b010, 0b000, 0b00)
-  F(dpcs1, 0b101, 0b000, 0b01)
-#undef F
-
-  enum SystemRegister { // o0<1> op1<3> CRn<4> CRm<4> op2<3>
-    SysReg_NZCV = 0b101101000010000,
-    SysReg_FPCR = 0b101101000100000,
-  };
-
-  void mrs(Register rt, SystemRegister systemReg) {
-    assert ((systemReg >> 15) == 0, "systemReg is out of range");
-    emit_int32(0b110101010011 << 20 | systemReg << 5 | rt->encoding_with_zr());
-  }
-
-  void msr(SystemRegister systemReg, Register rt) {
-    assert ((systemReg >> 15) == 0, "systemReg is out of range");
-    emit_int32(0b110101010001 << 20 | systemReg << 5 | rt->encoding_with_zr());
-  }
-
-  // Floating-point instructions
-
-#define F(mnemonic, M, S, type, opcode2) \
-  void mnemonic(FloatRegister rn, FloatRegister rm) {                         \
-    emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 |     \
-        rm->encoding() << 16 | 0b1000 << 10 | rn->encoding() << 5 | opcode2); \
-  }
-
-  F(fcmp_s,   0, 0, 0b00, 0b00000)
-  F(fcmpe_s,  0, 0, 0b00, 0b01000)
-  F(fcmp_d,   0, 0, 0b01, 0b00000)
-  F(fcmpe_d,  0, 0, 0b01, 0b10000)
-#undef F
-
-#define F(mnemonic, M, S, type, opcode2) \
-  void mnemonic(FloatRegister rn) {                                           \
-    emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 |     \
-        0b1000 << 10 | rn->encoding() << 5 | opcode2);                        \
-  }
-
-  F(fcmp0_s,   0, 0, 0b00, 0b01000)
-  F(fcmpe0_s,  0, 0, 0b00, 0b11000)
-  F(fcmp0_d,   0, 0, 0b01, 0b01000)
-  F(fcmpe0_d,  0, 0, 0b01, 0b11000)
-#undef F
-
-#define F(mnemonic, M, S, type, op) \
-  void mnemonic(FloatRegister rn, FloatRegister rm, int nzcv, AsmCondition cond) { \
-    assert ((nzcv >> 4) == 0, "illegal nzcv");                                                  \
-    emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 |                       \
-        rm->encoding() << 16 | cond << 12 | 0b01 << 10 | rn->encoding() << 5 | op << 4 | nzcv); \
-  }
-
-  F(fccmp_s,   0, 0, 0b00, 0)
-  F(fccmpe_s,  0, 0, 0b00, 1)
-  F(fccmp_d,   0, 0, 0b01, 0)
-  F(fccmpe_d,  0, 0, 0b01, 1)
-#undef F
-
-#define F(mnemonic, M, S, type) \
-  void mnemonic(FloatRegister rd, FloatRegister rn, FloatRegister rm, AsmCondition cond) { \
-    emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 |                       \
-        rm->encoding() << 16 | cond << 12 | 0b11 << 10 | rn->encoding() << 5 | rd->encoding()); \
-  }
-
-  F(fcsel_s,   0, 0, 0b00)
-  F(fcsel_d,   0, 0, 0b01)
-#undef F
-
-#define F(mnemonic, M, S, type, opcode) \
-  void mnemonic(FloatRegister rd, FloatRegister rn) { \
-    emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 |      \
-        opcode << 15 | 0b10000 << 10 | rn->encoding() << 5 | rd->encoding());  \
-  }
-
-  F(fmov_s,   0, 0, 0b00, 0b000000)
-  F(fabs_s,   0, 0, 0b00, 0b000001)
-  F(fneg_s,   0, 0, 0b00, 0b000010)
-  F(fsqrt_s,  0, 0, 0b00, 0b000011)
-  F(fcvt_ds,  0, 0, 0b00, 0b000101)
-  F(fcvt_hs,  0, 0, 0b00, 0b000111)
-  F(frintn_s, 0, 0, 0b00, 0b001000)
-  F(frintp_s, 0, 0, 0b00, 0b001001)
-  F(frintm_s, 0, 0, 0b00, 0b001010)
-  F(frintz_s, 0, 0, 0b00, 0b001011)
-  F(frinta_s, 0, 0, 0b00, 0b001100)
-  F(frintx_s, 0, 0, 0b00, 0b001110)
-  F(frinti_s, 0, 0, 0b00, 0b001111)
-
-  F(fmov_d,   0, 0, 0b01, 0b000000)
-  F(fabs_d,   0, 0, 0b01, 0b000001)
-  F(fneg_d,   0, 0, 0b01, 0b000010)
-  F(fsqrt_d,  0, 0, 0b01, 0b000011)
-  F(fcvt_sd,  0, 0, 0b01, 0b000100)
-  F(fcvt_hd,  0, 0, 0b01, 0b000111)
-  F(frintn_d, 0, 0, 0b01, 0b001000)
-  F(frintp_d, 0, 0, 0b01, 0b001001)
-  F(frintm_d, 0, 0, 0b01, 0b001010)
-  F(frintz_d, 0, 0, 0b01, 0b001011)
-  F(frinta_d, 0, 0, 0b01, 0b001100)
-  F(frintx_d, 0, 0, 0b01, 0b001110)
-  F(frinti_d, 0, 0, 0b01, 0b001111)
-
-  F(fcvt_sh,  0, 0, 0b11, 0b000100)
-  F(fcvt_dh,  0, 0, 0b11, 0b000101)
-#undef F
-
-#define F(mnemonic, M, S, type, opcode) \
-  void mnemonic(FloatRegister rd, FloatRegister rn, FloatRegister rm) { \
-    emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 |                          \
-        rm->encoding() << 16 | opcode << 12 | 0b10 << 10 | rn->encoding() << 5 | rd->encoding());  \
-  }
-
-  F(fmul_s,   0, 0, 0b00, 0b0000)
-  F(fdiv_s,   0, 0, 0b00, 0b0001)
-  F(fadd_s,   0, 0, 0b00, 0b0010)
-  F(fsub_s,   0, 0, 0b00, 0b0011)
-  F(fmax_s,   0, 0, 0b00, 0b0100)
-  F(fmin_s,   0, 0, 0b00, 0b0101)
-  F(fmaxnm_s, 0, 0, 0b00, 0b0110)
-  F(fminnm_s, 0, 0, 0b00, 0b0111)
-  F(fnmul_s,  0, 0, 0b00, 0b1000)
-
-  F(fmul_d,   0, 0, 0b01, 0b0000)
-  F(fdiv_d,   0, 0, 0b01, 0b0001)
-  F(fadd_d,   0, 0, 0b01, 0b0010)
-  F(fsub_d,   0, 0, 0b01, 0b0011)
-  F(fmax_d,   0, 0, 0b01, 0b0100)
-  F(fmin_d,   0, 0, 0b01, 0b0101)
-  F(fmaxnm_d, 0, 0, 0b01, 0b0110)
-  F(fminnm_d, 0, 0, 0b01, 0b0111)
-  F(fnmul_d,  0, 0, 0b01, 0b1000)
-#undef F
-
-#define F(mnemonic, M, S, type, o1, o0) \
-  void mnemonic(FloatRegister rd, FloatRegister rn, FloatRegister rm, FloatRegister ra) { \
-    emit_int32(M << 31 | S << 29 | 0b11111 << 24 | type << 22 | o1 << 21 | rm->encoding() << 16 |  \
-         o0 << 15 | ra->encoding() << 10 | rn->encoding() << 5 | rd->encoding());                  \
-  }
-
-  F(fmadd_s,  0, 0, 0b00, 0, 0)
-  F(fmsub_s,  0, 0, 0b00, 0, 1)
-  F(fnmadd_s, 0, 0, 0b00, 1, 0)
-  F(fnmsub_s, 0, 0, 0b00, 1, 1)
-
-  F(fmadd_d,  0, 0, 0b01, 0, 0)
-  F(fmsub_d,  0, 0, 0b01, 0, 1)
-  F(fnmadd_d, 0, 0, 0b01, 1, 0)
-  F(fnmsub_d, 0, 0, 0b01, 1, 1)
-#undef F
-
-#define F(mnemonic, M, S, type) \
-  void mnemonic(FloatRegister rd, int imm8) { \
-    assert ((imm8 >> 8) == 0, "immediate is out of range");                \
-    emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 |  \
-         imm8 << 13 | 0b100 << 10 | rd->encoding());                       \
-  }
-
-  F(fmov_s, 0, 0, 0b00)
-  F(fmov_d, 0, 0, 0b01)
-#undef F
-
-#define F(mnemonic, sf, S, type, rmode, opcode) \
-  void mnemonic(Register rd, FloatRegister rn) {                                     \
-    emit_int32(sf << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 |           \
-         rmode << 19 | opcode << 16 | rn->encoding() << 5 | rd->encoding_with_zr()); \
-  }
-
-  F(fcvtns_ws, 0, 0, 0b00, 0b00, 0b000)
-  F(fcvtnu_ws, 0, 0, 0b00, 0b00, 0b001)
-  F(fcvtas_ws, 0, 0, 0b00, 0b00, 0b100)
-  F(fcvtau_ws, 0, 0, 0b00, 0b00, 0b101)
-  F(fmov_ws,   0, 0, 0b00, 0b00, 0b110)
-  F(fcvtps_ws, 0, 0, 0b00, 0b01, 0b000)
-  F(fcvtpu_ws, 0, 0, 0b00, 0b01, 0b001)
-  F(fcvtms_ws, 0, 0, 0b00, 0b10, 0b000)
-  F(fcvtmu_ws, 0, 0, 0b00, 0b10, 0b001)
-  F(fcvtzs_ws, 0, 0, 0b00, 0b11, 0b000)
-  F(fcvtzu_ws, 0, 0, 0b00, 0b11, 0b001)
-
-  F(fcvtns_wd, 0, 0, 0b01, 0b00, 0b000)
-  F(fcvtnu_wd, 0, 0, 0b01, 0b00, 0b001)
-  F(fcvtas_wd, 0, 0, 0b01, 0b00, 0b100)
-  F(fcvtau_wd, 0, 0, 0b01, 0b00, 0b101)
-  F(fcvtps_wd, 0, 0, 0b01, 0b01, 0b000)
-  F(fcvtpu_wd, 0, 0, 0b01, 0b01, 0b001)
-  F(fcvtms_wd, 0, 0, 0b01, 0b10, 0b000)
-  F(fcvtmu_wd, 0, 0, 0b01, 0b10, 0b001)
-  F(fcvtzs_wd, 0, 0, 0b01, 0b11, 0b000)
-  F(fcvtzu_wd, 0, 0, 0b01, 0b11, 0b001)
-
-  F(fcvtns_xs, 1, 0, 0b00, 0b00, 0b000)
-  F(fcvtnu_xs, 1, 0, 0b00, 0b00, 0b001)
-  F(fcvtas_xs, 1, 0, 0b00, 0b00, 0b100)
-  F(fcvtau_xs, 1, 0, 0b00, 0b00, 0b101)
-  F(fcvtps_xs, 1, 0, 0b00, 0b01, 0b000)
-  F(fcvtpu_xs, 1, 0, 0b00, 0b01, 0b001)
-  F(fcvtms_xs, 1, 0, 0b00, 0b10, 0b000)
-  F(fcvtmu_xs, 1, 0, 0b00, 0b10, 0b001)
-  F(fcvtzs_xs, 1, 0, 0b00, 0b11, 0b000)
-  F(fcvtzu_xs, 1, 0, 0b00, 0b11, 0b001)
-
-  F(fcvtns_xd, 1, 0, 0b01, 0b00, 0b000)
-  F(fcvtnu_xd, 1, 0, 0b01, 0b00, 0b001)
-  F(fcvtas_xd, 1, 0, 0b01, 0b00, 0b100)
-  F(fcvtau_xd, 1, 0, 0b01, 0b00, 0b101)
-  F(fmov_xd,   1, 0, 0b01, 0b00, 0b110)
-  F(fcvtps_xd, 1, 0, 0b01, 0b01, 0b000)
-  F(fcvtpu_xd, 1, 0, 0b01, 0b01, 0b001)
-  F(fcvtms_xd, 1, 0, 0b01, 0b10, 0b000)
-  F(fcvtmu_xd, 1, 0, 0b01, 0b10, 0b001)
-  F(fcvtzs_xd, 1, 0, 0b01, 0b11, 0b000)
-  F(fcvtzu_xd, 1, 0, 0b01, 0b11, 0b001)
-
-  F(fmov_xq,   1, 0, 0b10, 0b01, 0b110)
-#undef F
-
-#define F(mnemonic, sf, S, type, rmode, opcode) \
-  void mnemonic(FloatRegister rd, Register rn) {                                     \
-    emit_int32(sf << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 |           \
-         rmode << 19 | opcode << 16 | rn->encoding_with_zr() << 5 | rd->encoding()); \
-  }
-
-  F(scvtf_sw,  0, 0, 0b00, 0b00, 0b010)
-  F(ucvtf_sw,  0, 0, 0b00, 0b00, 0b011)
-  F(fmov_sw,   0, 0, 0b00, 0b00, 0b111)
-  F(scvtf_dw,  0, 0, 0b01, 0b00, 0b010)
-  F(ucvtf_dw,  0, 0, 0b01, 0b00, 0b011)
-
-  F(scvtf_sx,  1, 0, 0b00, 0b00, 0b010)
-  F(ucvtf_sx,  1, 0, 0b00, 0b00, 0b011)
-  F(scvtf_dx,  1, 0, 0b01, 0b00, 0b010)
-  F(ucvtf_dx,  1, 0, 0b01, 0b00, 0b011)
-  F(fmov_dx,   1, 0, 0b01, 0b00, 0b111)
-
-  F(fmov_qx,   1, 0, 0b10, 0b01, 0b111)
-#undef F
-
-#define F(mnemonic, opcode) \
-  void mnemonic(FloatRegister Vd, FloatRegister Vn) {                                     \
-    emit_int32( opcode << 10 | Vn->encoding() << 5 | Vd->encoding());             \
-  }
-
-  F(aese, 0b0100111000101000010010);
-  F(aesd, 0b0100111000101000010110);
-  F(aesmc, 0b0100111000101000011010);
-  F(aesimc, 0b0100111000101000011110);
-#undef F
-
-#ifdef COMPILER2
-  typedef VFP::double_num double_num;
-  typedef VFP::float_num  float_num;
-#endif
-
-  void vcnt(FloatRegister Dd, FloatRegister Dn, int quad = 0, int size = 0) {
-    // emitted at VM startup to detect whether the instruction is available
-    assert(!VM_Version::is_initialized() || VM_Version::has_simd(), "simd instruction");
-    assert(size == 0, "illegal size value");
-    emit_int32(0x0e205800 | quad << 30 | size << 22 | Dn->encoding() << 5 | Dd->encoding());
-  }
-
-#ifdef COMPILER2
-  void addv(FloatRegister Dd, FloatRegister Dm, int quad, int size) {
-    // emitted at VM startup to detect whether the instruction is available
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert((quad & ~1) == 0, "illegal value");
-    assert(size >= 0 && size < 3, "illegal value");
-    assert(((size << 1) | quad) != 4, "illegal values (size 2, quad 0)");
-    emit_int32(0x0e31b800 | quad << 30 | size << 22 | Dm->encoding() << 5 | Dd->encoding());
-  }
-
-  enum VElem_Size {
-    VELEM_SIZE_8  = 0x00,
-    VELEM_SIZE_16 = 0x01,
-    VELEM_SIZE_32 = 0x02,
-    VELEM_SIZE_64 = 0x03
-  };
-
-  enum VLD_Type {
-    VLD1_TYPE_1_REG  = 0b0111,
-    VLD1_TYPE_2_REGS = 0b1010,
-    VLD1_TYPE_3_REGS = 0b0110,
-    VLD1_TYPE_4_REGS = 0b0010
-  };
-
-  enum VFloat_Arith_Size {
-    VFA_SIZE_F32 = 0b0,
-    VFA_SIZE_F64 = 0b1
-  };
-
-#define F(mnemonic, U, S, P) \
-  void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm,    \
-                int size, int quad) {                                    \
-    assert(VM_Version::has_simd(), "simd instruction");                  \
-    assert(!(size == VFA_SIZE_F64 && !quad), "reserved");                \
-    assert((size & 1) == size, "overflow");                              \
-    emit_int32(quad << 30 | U << 29 | 0b01110 << 24 |                    \
-               S << 23 | size << 22 | 1 << 21 | P << 11 | 1 << 10 |      \
-               fm->encoding() << 16 |                                    \
-               fn->encoding() <<  5 |                                    \
-               fd->encoding());                                          \
-  }
-
-  F(vaddF, 0, 0, 0b11010)  // Vd = Vn + Vm (float)
-  F(vsubF, 0, 1, 0b11010)  // Vd = Vn - Vm (float)
-  F(vmulF, 1, 0, 0b11011)  // Vd = Vn - Vm (float)
-  F(vdivF, 1, 0, 0b11111)  // Vd = Vn / Vm (float)
-#undef F
-
-#define F(mnemonic, U) \
-  void mnemonic(FloatRegister fd, FloatRegister fm, FloatRegister fn,    \
-                int size, int quad) {                                    \
-    assert(VM_Version::has_simd(), "simd instruction");                  \
-    assert(!(size == VELEM_SIZE_64 && !quad), "reserved");               \
-    assert((size & 0b11) == size, "overflow");                           \
-    int R = 0; /* rounding */                                            \
-    int S = 0; /* saturating */                                          \
-    emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | size << 22 |       \
-               1 << 21 | R << 12 | S << 11 | 0b10001 << 10 |             \
-               fm->encoding() << 16 |                                    \
-               fn->encoding() <<  5 |                                    \
-               fd->encoding());                                          \
-  }
-
-  F(vshlSI, 0)  // Vd = ashift(Vn,Vm) (int)
-  F(vshlUI, 1)  // Vd = lshift(Vn,Vm) (int)
-#undef F
-
-#define F(mnemonic, U, P, M) \
-  void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm,    \
-                int size, int quad) {                                    \
-    assert(VM_Version::has_simd(), "simd instruction");                  \
-    assert(!(size == VELEM_SIZE_64 && !quad), "reserved");               \
-    assert(!(size == VELEM_SIZE_64 && M), "reserved");                   \
-    assert((size & 0b11) == size, "overflow");                           \
-    emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | size << 22 |       \
-               1 << 21 | P << 11 | 1 << 10 |                             \
-               fm->encoding() << 16 |                                    \
-               fn->encoding() <<  5 |                                    \
-               fd->encoding());                                          \
-  }
-
-  F(vmulI, 0, 0b10011,  true)  // Vd = Vn * Vm (int)
-  F(vaddI, 0, 0b10000, false)  // Vd = Vn + Vm (int)
-  F(vsubI, 1, 0b10000, false)  // Vd = Vn - Vm (int)
-#undef F
-
-#define F(mnemonic, U, O) \
-  void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm,    \
-                int quad) {                                              \
-    assert(VM_Version::has_simd(), "simd instruction");                  \
-    emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | O << 22 |          \
-               1 << 21 | 0b00011 << 11 | 1 << 10 |                       \
-               fm->encoding() << 16 |                                    \
-               fn->encoding() <<  5 |                                    \
-               fd->encoding());                                          \
-  }
-
-  F(vandI, 0, 0b00)  // Vd = Vn & Vm (int)
-  F(vorI,  0, 0b10)  // Vd = Vn | Vm (int)
-  F(vxorI, 1, 0b00)  // Vd = Vn ^ Vm (int)
-#undef F
-
-  void vnegI(FloatRegister fd, FloatRegister fn, int size, int quad) {
-    int U = 1;
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(quad || size != VELEM_SIZE_64, "reserved");
-    emit_int32(quad << 30 | U << 29 | 0b01110 << 24 |
-              size << 22 | 0b100000101110 << 10 |
-              fn->encoding() << 5 |
-              fd->encoding() << 0);
-  }
-
-  void vshli(FloatRegister fd, FloatRegister fn, int esize, int imm, int quad) {
-    assert(VM_Version::has_simd(), "simd instruction");
-
-    if (imm >= esize) {
-      // maximum shift gives all zeroes, direction doesn't matter,
-      // but only available for shift right
-      vshri(fd, fn, esize, esize, true /* unsigned */, quad);
-      return;
-    }
-    assert(imm >= 0 && imm < esize, "out of range");
-
-    int imm7 = esize + imm;
-    int immh = imm7 >> 3;
-    assert(immh != 0, "encoding constraint");
-    assert((uint)immh < 16, "sanity");
-    assert(((immh >> 2) | quad) != 0b10, "reserved");
-    emit_int32(quad << 30 | 0b011110 << 23 | imm7 << 16 |
-               0b010101 << 10 | fn->encoding() << 5 | fd->encoding() << 0);
-  }
-
-  void vshri(FloatRegister fd, FloatRegister fn, int esize, int imm,
-             bool U /* unsigned */, int quad) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(imm > 0, "out of range");
-    if (imm >= esize) {
-      // maximum shift (all zeroes)
-      imm = esize;
-    }
-    int imm7 = 2 * esize - imm ;
-    int immh = imm7 >> 3;
-    assert(immh != 0, "encoding constraint");
-    assert((uint)immh < 16, "sanity");
-    assert(((immh >> 2) | quad) != 0b10, "reserved");
-    emit_int32(quad << 30 | U << 29 | 0b011110 << 23 | imm7 << 16 |
-               0b000001 << 10 | fn->encoding() << 5 | fd->encoding() << 0);
-  }
-  void vshrUI(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) {
-    vshri(fd, fm, size, imm, true /* unsigned */, quad);
-  }
-  void vshrSI(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) {
-    vshri(fd, fm, size, imm, false /* signed */, quad);
-  }
-
-  void vld1(FloatRegister Vt, Address addr, VElem_Size size, int bits) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(bits == 128, "unsupported");
-    assert(addr.disp() == 0 || addr.disp() == 16, "must be");
-    int type = 0b11; // 2D
-    int quad = 1;
-    int L = 1;
-    int opcode = VLD1_TYPE_1_REG;
-    emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
-               Vt->encoding() << 0 | addr.encoding_simd());
-  }
-
-  void vst1(FloatRegister Vt, Address addr, VElem_Size size, int bits) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(bits == 128, "unsupported");
-    assert(addr.disp() == 0 || addr.disp() == 16, "must be");
-    int type = 0b11; // 2D
-    int quad = 1;
-    int L = 0;
-    int opcode = VLD1_TYPE_1_REG;
-    emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
-               Vt->encoding() << 0 | addr.encoding_simd());
-  }
-
-  void vld1(FloatRegister Vt, FloatRegister Vt2, Address addr, VElem_Size size, int bits) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(bits == 128, "unsupported");
-    assert(Vt->successor() == Vt2, "Registers must be ordered");
-    assert(addr.disp() == 0 || addr.disp() == 32, "must be");
-    int type = 0b11; // 2D
-    int quad = 1;
-    int L = 1;
-    int opcode = VLD1_TYPE_2_REGS;
-    emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
-               Vt->encoding() << 0 | addr.encoding_simd());
-  }
-
-  void vst1(FloatRegister Vt, FloatRegister Vt2, Address addr, VElem_Size size, int bits) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(Vt->successor() == Vt2, "Registers must be ordered");
-    assert(bits == 128, "unsupported");
-    assert(addr.disp() == 0 || addr.disp() == 32, "must be");
-    int type = 0b11; // 2D
-    int quad = 1;
-    int L = 0;
-    int opcode = VLD1_TYPE_2_REGS;
-    emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
-               Vt->encoding() << 0 | addr.encoding_simd());
-  }
-
-  void vld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,
-            Address addr, VElem_Size size, int bits) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(bits == 128, "unsupported");
-    assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3,
-          "Registers must be ordered");
-    assert(addr.disp() == 0 || addr.disp() == 48, "must be");
-    int type = 0b11; // 2D
-    int quad = 1;
-    int L = 1;
-    int opcode = VLD1_TYPE_3_REGS;
-    emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
-               Vt->encoding() << 0 | addr.encoding_simd());
-  }
-
-  void vst1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,
-            Address addr, VElem_Size size, int bits) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(bits == 128, "unsupported");
-    assert(Vt->successor() == Vt2 &&  Vt2->successor() == Vt3,
-           "Registers must be ordered");
-    assert(addr.disp() == 0 || addr.disp() == 48, "must be");
-    int type = 0b11; // 2D
-    int quad = 1;
-    int L = 0;
-    int opcode = VLD1_TYPE_3_REGS;
-    emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
-               Vt->encoding() << 0 | addr.encoding_simd());
-  }
-
-  void vld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,
-            FloatRegister Vt4, Address addr, VElem_Size size, int bits) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(bits == 128, "unsupported");
-    assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 &&
-           Vt3->successor() == Vt4, "Registers must be ordered");
-    assert(addr.disp() == 0 || addr.disp() == 64, "must be");
-    int type = 0b11; // 2D
-    int quad = 1;
-    int L = 1;
-    int opcode = VLD1_TYPE_4_REGS;
-    emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
-               Vt->encoding() << 0 | addr.encoding_simd());
-  }
-
-  void vst1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,
-            FloatRegister Vt4,  Address addr, VElem_Size size, int bits) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(bits == 128, "unsupported");
-    assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 &&
-           Vt3->successor() == Vt4, "Registers must be ordered");
-    assert(addr.disp() == 0 || addr.disp() == 64, "must be");
-    int type = 0b11; // 2D
-    int quad = 1;
-    int L = 0;
-    int opcode = VLD1_TYPE_4_REGS;
-    emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 |
-               Vt->encoding() << 0 | addr.encoding_simd());
-  }
-
-  void rev32(FloatRegister Vd, FloatRegister Vn, VElem_Size size, int quad) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(size == VELEM_SIZE_8 || size == VELEM_SIZE_16, "must be");
-    emit_int32(quad << 30 | 0b101110 << 24 | size << 22 |
-               0b100000000010 << 10 | Vn->encoding() << 5 | Vd->encoding());
-  }
-
-  void eor(FloatRegister Vd, FloatRegister Vn,  FloatRegister Vm, VElem_Size size, int quad) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(size == VELEM_SIZE_8, "must be");
-    emit_int32(quad << 30 | 0b101110001 << 21 | Vm->encoding() << 16 |
-               0b000111 << 10 | Vn->encoding() << 5 | Vd->encoding());
-  }
-
-  void orr(FloatRegister Vd, FloatRegister Vn,  FloatRegister Vm, VElem_Size size, int quad) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(size == VELEM_SIZE_8, "must be");
-    emit_int32(quad << 30 | 0b001110101 << 21 | Vm->encoding() << 16 |
-               0b000111 << 10 | Vn->encoding() << 5 | Vd->encoding());
-  }
-
-  void vmovI(FloatRegister Dd, int imm8, VElem_Size size, int quad) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(imm8 >= 0 && imm8 < 256, "out of range");
-    int op;
-    int cmode;
-    switch (size) {
-    case VELEM_SIZE_8:
-      op = 0;
-      cmode = 0b1110;
-      break;
-    case VELEM_SIZE_16:
-      op = 0;
-      cmode = 0b1000;
-      break;
-    case VELEM_SIZE_32:
-      op = 0;
-      cmode = 0b0000;
-      break;
-    default:
-      cmode = 0;
-      ShouldNotReachHere();
-    }
-    int abc = imm8 >> 5;
-    int defgh = imm8 & 0b11111;
-    emit_int32(quad << 30 | op << 29 | 0b1111 << 24 |
-               abc << 16 | cmode << 12 | 0b01 << 10 |
-               defgh << 5 | Dd->encoding() << 0);
-  }
-
-  void vdupI(FloatRegister Dd, Register Rn, VElem_Size size, int quad) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    assert(size <= 3, "unallocated encoding");
-    assert(size != 3 || quad == 1, "reserved");
-    int imm5 = 1 << size;
-#ifdef ASSERT
-    switch (size) {
-    case VELEM_SIZE_8:
-      assert(imm5 == 0b00001, "sanity");
-      break;
-    case VELEM_SIZE_16:
-      assert(imm5 == 0b00010, "sanity");
-      break;
-    case VELEM_SIZE_32:
-      assert(imm5 == 0b00100, "sanity");
-      break;
-    case VELEM_SIZE_64:
-      assert(imm5 == 0b01000, "sanity");
-      break;
-    default:
-      ShouldNotReachHere();
-    }
-#endif
-    emit_int32(quad << 30 | 0b111 << 25 | 0b11 << 10 |
-               imm5 << 16 | Rn->encoding() << 5 |
-               Dd->encoding() << 0);
-  }
-
-  void vdup(FloatRegister Vd, FloatRegister Vn, VElem_Size size, int quad) {
-    assert(VM_Version::has_simd(), "simd instruction");
-    int index = 0;
-    int bytes = 1 << size;
-    int range = 16 / bytes;
-    assert(index < range, "overflow");
-
-    assert(size != VELEM_SIZE_64 || quad, "reserved");
-    assert(8 << VELEM_SIZE_8  ==  8, "sanity");
-    assert(8 << VELEM_SIZE_16 == 16, "sanity");
-    assert(8 << VELEM_SIZE_32 == 32, "sanity");
-    assert(8 << VELEM_SIZE_64 == 64, "sanity");
-
-    int imm5 = (index << (size + 1)) | bytes;
-
-    emit_int32(quad << 30 | 0b001110000 << 21 | imm5 << 16 | 0b000001 << 10 |
-               Vn->encoding() << 5 | Vd->encoding() << 0);
-  }
-
-  void vdupF(FloatRegister Vd, FloatRegister Vn, int quad) {
-    vdup(Vd, Vn, VELEM_SIZE_32, quad);
-  }
-
-  void vdupD(FloatRegister Vd, FloatRegister Vn, int quad) {
-    vdup(Vd, Vn, VELEM_SIZE_64, quad);
-  }
-#endif
-};
-
-
-#endif // CPU_ARM_VM_ASSEMBLER_ARM_64_HPP
diff --git a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
index b28403544db..c06357b48b7 100644
--- a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
@@ -67,9 +67,6 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
 
   if (_info->deoptimize_on_exception()) {
-#ifdef AARCH64
-    __ NOT_TESTED();
-#endif
     __ call(Runtime1::entry_for(Runtime1::predicate_failed_trap_id), relocInfo::runtime_call_type);
     ce->add_call_info_here(_info);
     ce->verify_oop_map(_info);
@@ -86,9 +83,6 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
   }
 
   if (_throw_index_out_of_bounds_exception) {
-#ifdef AARCH64
-    __ NOT_TESTED();
-#endif
     __ call(Runtime1::entry_for(Runtime1::throw_index_exception_id), relocInfo::runtime_call_type);
   } else {
     __ str(_array->as_pointer_register(), Address(SP, BytesPerWord)); // ??? Correct offset? Correct instruction?
@@ -208,16 +202,12 @@ void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
   const Register lock_reg = _lock_reg->as_pointer_register();
 
   ce->verify_reserved_argument_area_size(2);
-#ifdef AARCH64
-  __ stp(obj_reg, lock_reg, Address(SP));
-#else
   if (obj_reg < lock_reg) {
     __ stmia(SP, RegisterSet(obj_reg) | RegisterSet(lock_reg));
   } else {
     __ str(obj_reg, Address(SP));
     __ str(lock_reg, Address(SP, BytesPerWord));
   }
-#endif // AARCH64
 
   Runtime1::StubID enter_id = ce->compilation()->has_fpu_code() ?
                               Runtime1::monitorenter_id :
@@ -259,7 +249,7 @@ void PatchingStub::align_patch_site(MacroAssembler* masm) {
 }
 
 void PatchingStub::emit_code(LIR_Assembler* ce) {
-  const int patchable_instruction_offset = AARCH64_ONLY(NativeInstruction::instruction_size) NOT_AARCH64(0);
+  const int patchable_instruction_offset = 0;
 
   assert(NativeCall::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF,
          "not enough room for call");
@@ -267,31 +257,17 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
   Label call_patch;
   bool is_load = (_id == load_klass_id) || (_id == load_mirror_id) || (_id == load_appendix_id);
 
-#ifdef AARCH64
-  assert(nativeInstruction_at(_pc_start)->is_nop(), "required for MT safe patching");
 
-  // Same alignment of reg2mem code and PatchingStub code. Required to make copied bind_literal() code properly aligned.
-  __ align(wordSize);
-#endif // AARCH64
-
-  if (is_load NOT_AARCH64(&& !VM_Version::supports_movw())) {
+  if (is_load && !VM_Version::supports_movw()) {
     address start = __ pc();
 
     // The following sequence duplicates code provided in MacroAssembler::patchable_mov_oop()
     // without creating relocation info entry.
-#ifdef AARCH64
-    // Extra nop for MT safe patching
-    __ nop();
-#endif // AARCH64
 
     assert((__ pc() - start) == patchable_instruction_offset, "should be");
-#ifdef AARCH64
-    __ ldr(_obj, __ pc());
-#else
     __ ldr(_obj, Address(PC));
     // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
     __ nop();
-#endif // AARCH64
 
 #ifdef ASSERT
     for (int i = 0; i < _bytes_to_copy; i++) {
diff --git a/src/hotspot/cpu/arm/c1_Defs_arm.hpp b/src/hotspot/cpu/arm/c1_Defs_arm.hpp
index 665d5682e56..f2baedf8593 100644
--- a/src/hotspot/cpu/arm/c1_Defs_arm.hpp
+++ b/src/hotspot/cpu/arm/c1_Defs_arm.hpp
@@ -47,9 +47,9 @@ enum {
 
 // registers
 enum {
-  pd_nof_cpu_regs_frame_map             = AARCH64_ONLY(33) NOT_AARCH64(16), // number of registers used during code emission
-  pd_nof_caller_save_cpu_regs_frame_map = AARCH64_ONLY(27) NOT_AARCH64(10), // number of registers killed by calls
-  pd_nof_cpu_regs_reg_alloc             = AARCH64_ONLY(27) NOT_AARCH64(10), // number of registers that are visible to register allocator (including Rheap_base which is visible only if compressed pointers are not enabled)
+  pd_nof_cpu_regs_frame_map             = 16, // number of registers used during code emission
+  pd_nof_caller_save_cpu_regs_frame_map = 10, // number of registers killed by calls
+  pd_nof_cpu_regs_reg_alloc             = 10, // number of registers that are visible to register allocator (including Rheap_base which is visible only if compressed pointers are not enabled)
   pd_nof_cpu_regs_linearscan = pd_nof_cpu_regs_frame_map,                   // number of registers visible to linear scan
   pd_nof_cpu_regs_processed_in_linearscan = pd_nof_cpu_regs_reg_alloc + 1,  // number of registers processed in linear scan; includes LR as it is used as temporary register in c1_LIRGenerator_arm
   pd_first_cpu_reg = 0,
@@ -57,7 +57,7 @@ enum {
 
   pd_nof_fpu_regs_frame_map             = VFP(32) SOFT(0),                               // number of float registers used during code emission
   pd_nof_caller_save_fpu_regs_frame_map = VFP(32) SOFT(0),                               // number of float registers killed by calls
-  pd_nof_fpu_regs_reg_alloc             = AARCH64_ONLY(32) NOT_AARCH64(VFP(30) SOFT(0)), // number of float registers that are visible to register allocator
+  pd_nof_fpu_regs_reg_alloc             = VFP(30) SOFT(0), // number of float registers that are visible to register allocator
   pd_nof_fpu_regs_linearscan            = pd_nof_fpu_regs_frame_map,                     // number of float registers visible to linear scan
   pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
   pd_last_fpu_reg  = pd_first_fpu_reg + pd_nof_fpu_regs_frame_map - 1,
@@ -74,11 +74,7 @@ enum {
   pd_float_saved_as_double = false
 };
 
-#ifdef AARCH64
-#define PATCHED_ADDR 0xff8
-#else
 #define PATCHED_ADDR (204)
-#endif
 #define CARDTABLEBARRIERSET_POST_BARRIER_HELPER
 #define GENERATE_ADDRESS_IS_PREFERRED
 
diff --git a/src/hotspot/cpu/arm/c1_FrameMap_arm.cpp b/src/hotspot/cpu/arm/c1_FrameMap_arm.cpp
index 11d0e070538..bd74c3f83b4 100644
--- a/src/hotspot/cpu/arm/c1_FrameMap_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_FrameMap_arm.cpp
@@ -49,9 +49,6 @@ LIR_Opr FrameMap::R3_metadata_opr;
 LIR_Opr FrameMap::R4_metadata_opr;
 LIR_Opr FrameMap::R5_metadata_opr;
 
-#ifdef AARCH64
-LIR_Opr FrameMap::ZR_opr;
-#endif // AARCH64
 
 LIR_Opr FrameMap::LR_opr;
 LIR_Opr FrameMap::LR_oop_opr;
@@ -82,12 +79,7 @@ LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
   } else if (r_1->is_Register()) {
     Register reg = r_1->as_Register();
     if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
-#ifdef AARCH64
-      assert(r_1->next() == r_2, "should be the same");
-      opr = as_long_opr(reg);
-#else
       opr = as_long_opr(reg, r_2->as_Register());
-#endif
     } else if (is_reference_type(type)) {
       opr = as_oop_opr(reg);
     } else if (type == T_METADATA) {
@@ -117,20 +109,10 @@ void FrameMap::initialize() {
   int rnum = 0;
 
   // Registers used for allocation
-#ifdef AARCH64
-  assert(Rthread == R28 && Rheap_base == R27 && Rtemp == R16, "change the code here");
-  for (i = 0; i < 16; i++) {
-    map_register(rnum++, as_Register(i));
-  }
-  for (i = 17; i < 28; i++) {
-    map_register(rnum++, as_Register(i));
-  }
-#else
   assert(Rthread == R10 && Rtemp == R12, "change the code here");
   for (i = 0; i < 10; i++) {
     map_register(rnum++, as_Register(i));
   }
-#endif // AARCH64
   assert(rnum == pd_nof_cpu_regs_reg_alloc, "should be");
 
   // Registers not used for allocation
@@ -141,11 +123,7 @@ void FrameMap::initialize() {
   map_register(rnum++, Rthread);
   map_register(rnum++, FP); // ARM32: R7 or R11
   map_register(rnum++, SP);
-#ifdef AARCH64
-  map_register(rnum++, ZR);
-#else
   map_register(rnum++, PC);
-#endif
   assert(rnum == pd_nof_cpu_regs_frame_map, "should be");
 
   _init_done = true;
@@ -157,9 +135,6 @@ void FrameMap::initialize() {
   R4_opr  = as_opr(R4);   R4_oop_opr = as_oop_opr(R4);    R4_metadata_opr = as_metadata_opr(R4);
   R5_opr  = as_opr(R5);   R5_oop_opr = as_oop_opr(R5);    R5_metadata_opr = as_metadata_opr(R5);
 
-#ifdef AARCH64
-  ZR_opr = as_opr(ZR);
-#endif // AARCH64
 
   LR_opr      = as_opr(LR);
   LR_oop_opr  = as_oop_opr(LR);
@@ -171,11 +146,6 @@ void FrameMap::initialize() {
   // LIR operands for result
   Int_result_opr = R0_opr;
   Object_result_opr = R0_oop_opr;
-#ifdef AARCH64
-  Long_result_opr = as_long_opr(R0);
-  Float_result_opr = as_float_opr(S0);
-  Double_result_opr = as_double_opr(D0);
-#else
   Long_result_opr = as_long_opr(R0, R1);
 #ifdef __ABI_HARD__
   Float_result_opr = as_float_opr(S0);
@@ -184,7 +154,6 @@ void FrameMap::initialize() {
   Float_result_opr = LIR_OprFact::single_softfp(0);
   Double_result_opr = LIR_OprFact::double_softfp(0, 1);
 #endif // __ABI_HARD__
-#endif // AARCH64
 
   Exception_oop_opr = as_oop_opr(Rexception_obj);
   Exception_pc_opr = as_opr(Rexception_pc);
@@ -224,7 +193,7 @@ bool FrameMap::validate_frame() {
     }
     java_index += type2size[opr->type()];
   }
-  return max_offset < AARCH64_ONLY(16384) NOT_AARCH64(4096); // TODO-AARCH64 check that LIRAssembler does not generate load/store of byte and half-word with SP as address base
+  return max_offset < 4096;
 }
 
 VMReg FrameMap::fpu_regname(int n) {
diff --git a/src/hotspot/cpu/arm/c1_FrameMap_arm.hpp b/src/hotspot/cpu/arm/c1_FrameMap_arm.hpp
index efb2acfb837..2309224f148 100644
--- a/src/hotspot/cpu/arm/c1_FrameMap_arm.hpp
+++ b/src/hotspot/cpu/arm/c1_FrameMap_arm.hpp
@@ -54,9 +54,6 @@
   static LIR_Opr R4_metadata_opr;
   static LIR_Opr R5_metadata_opr;
 
-#ifdef AARCH64
-  static LIR_Opr ZR_opr;
-#endif // AARCH64
 
   static LIR_Opr LR_opr;
   static LIR_Opr LR_oop_opr;
@@ -75,19 +72,6 @@
   static LIR_Opr Exception_oop_opr;
   static LIR_Opr Exception_pc_opr;
 
-#ifdef AARCH64
-  static LIR_Opr as_long_opr(Register r) {
-    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
-  }
-
-  static LIR_Opr as_pointer_opr(Register r) {
-    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
-  }
-
-  static LIR_Opr as_double_opr(FloatRegister r) {
-    return LIR_OprFact::double_fpu(r->encoding());
-  }
-#else
   static LIR_Opr as_long_opr(Register r, Register r2) {
     return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r2));
   }
@@ -99,7 +83,6 @@
   static LIR_Opr as_double_opr(FloatRegister r) {
     return LIR_OprFact::double_fpu(r->encoding(), r->successor()->encoding());
   }
-#endif
 
   static LIR_Opr as_float_opr(FloatRegister r) {
     return LIR_OprFact::single_fpu(r->encoding());
diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
index f0a7229aa18..89e0c9ab7de 100644
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
@@ -127,9 +127,6 @@ void LIR_Assembler::pop(LIR_Opr opr) {
 Address LIR_Assembler::as_Address(LIR_Address* addr) {
   Register base = addr->base()->as_pointer_register();
 
-#ifdef AARCH64
-  int align = exact_log2(type2aelembytes(addr->type(), true));
-#endif
 
   if (addr->index()->is_illegal() || addr->index()->is_constant()) {
     int offset = addr->disp();
@@ -137,16 +134,9 @@ Address LIR_Assembler::as_Address(LIR_Address* addr) {
       offset += addr->index()->as_constant_ptr()->as_jint() << addr->scale();
     }
 
-#ifdef AARCH64
-    if (!Assembler::is_unsigned_imm_in_range(offset, 12, align) && !Assembler::is_imm_in_range(offset, 9, 0)) {
-      BAILOUT_("offset not in range", Address(base));
-    }
-    assert(UseUnalignedAccesses || (offset & right_n_bits(align)) == 0, "offset should be aligned");
-#else
     if ((offset <= -4096) || (offset >= 4096)) {
       BAILOUT_("offset not in range", Address(base));
     }
-#endif // AARCH64
 
     return Address(base, offset);
 
@@ -154,44 +144,21 @@ Address LIR_Assembler::as_Address(LIR_Address* addr) {
     assert(addr->disp() == 0, "can't have both");
     int scale = addr->scale();
 
-#ifdef AARCH64
-    assert((scale == 0) || (scale == align), "scale should be zero or equal to embedded shift");
-
-    bool is_index_extended = (addr->index()->type() == T_INT);
-    if (is_index_extended) {
-      assert(addr->index()->is_single_cpu(), "should be");
-      return Address(base, addr->index()->as_register(), ex_sxtw, scale);
-    } else {
-      assert(addr->index()->is_double_cpu(), "should be");
-      return Address(base, addr->index()->as_register_lo(), ex_lsl, scale);
-    }
-#else
     assert(addr->index()->is_single_cpu(), "should be");
     return scale >= 0 ? Address(base, addr->index()->as_register(), lsl, scale) :
                         Address(base, addr->index()->as_register(), lsr, -scale);
-#endif // AARCH64
   }
 }
 
 Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
-#ifdef AARCH64
-  ShouldNotCallThis(); // Not used on AArch64
-  return Address();
-#else
   Address base = as_Address(addr);
   assert(base.index() == noreg, "must be");
   if (base.disp() + BytesPerWord >= 4096) { BAILOUT_("offset not in range", Address(base.base(),0)); }
   return Address(base.base(), base.disp() + BytesPerWord);
-#endif // AARCH64
 }
 
 Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
-#ifdef AARCH64
-  ShouldNotCallThis(); // Not used on AArch64
-  return Address();
-#else
   return as_Address(addr);
-#endif // AARCH64
 }
 
 
@@ -327,13 +294,8 @@ int LIR_Assembler::emit_deopt_handler() {
   int offset = code_offset();
 
   __ mov_relative_address(LR, __ pc());
-#ifdef AARCH64
-  __ raw_push(LR, LR);
-  __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, Rtemp);
-#else
   __ push(LR); // stub expects LR to be saved
   __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, noreg);
-#endif // AARCH64
 
   assert(code_offset() - offset <= deopt_handler_size(), "overflow");
   __ end_a_stub();
@@ -347,7 +309,6 @@ void LIR_Assembler::return_op(LIR_Opr result) {
   __ remove_frame(initial_frame_size_in_bytes());
 
   // mov_slow here is usually one or two instruction
-  // TODO-AARCH64 3 instructions on AArch64, so try to load polling page by ldr_literal
   __ mov_address(Rtemp, os::get_polling_page(), symbolic_Relocation::polling_page_reference);
   __ relocate(relocInfo::poll_return_type);
   __ ldr(Rtemp, Address(Rtemp));
@@ -386,12 +347,8 @@ void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_cod
 
     case T_LONG:
       assert(patch_code == lir_patch_none, "no patching handled here");
-#ifdef AARCH64
-      __ mov_slow(dest->as_pointer_register(), (intptr_t)c->as_jlong());
-#else
       __ mov_slow(dest->as_register_lo(), c->as_jint_lo());
       __ mov_slow(dest->as_register_hi(), c->as_jint_hi());
-#endif // AARCH64
       break;
 
     case T_OBJECT:
@@ -414,12 +371,8 @@ void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_cod
       if (dest->is_single_fpu()) {
         __ mov_float(dest->as_float_reg(), c->as_jfloat());
       } else {
-#ifdef AARCH64
-        ShouldNotReachHere();
-#else
         // Simple getters can return float constant directly into r0
         __ mov_slow(dest->as_register(), c->as_jint_bits());
-#endif // AARCH64
       }
       break;
 
@@ -427,13 +380,9 @@ void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_cod
       if (dest->is_double_fpu()) {
         __ mov_double(dest->as_double_reg(), c->as_jdouble());
       } else {
-#ifdef AARCH64
-        ShouldNotReachHere();
-#else
         // Simple getters can return double constant directly into r1r0
         __ mov_slow(dest->as_register_lo(), c->as_jint_lo_bits());
         __ mov_slow(dest->as_register_hi(), c->as_jint_hi_bits());
-#endif // AARCH64
       }
       break;
 
@@ -466,17 +415,12 @@ void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
 
     case T_LONG:  // fall through
     case T_DOUBLE:
-#ifdef AARCH64
-      __ mov_slow(Rtemp, c->as_jlong_bits());
-      __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix()));
-#else
       __ mov_slow(Rtemp, c->as_jint_lo_bits());
       __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes));
       if (c->as_jint_hi_bits() != c->as_jint_lo_bits()) {
         __ mov_slow(Rtemp, c->as_jint_hi_bits());
       }
       __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes));
-#endif // AARCH64
       break;
 
     default:
@@ -486,49 +430,14 @@ void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
 
 void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type,
                               CodeEmitInfo* info, bool wide) {
-#ifdef AARCH64
-  assert((src->as_constant_ptr()->type() == T_OBJECT && src->as_constant_ptr()->as_jobject() == NULL) ||
-         (src->as_constant_ptr()->type() == T_INT && src->as_constant_ptr()->as_jint() == 0) ||
-         (src->as_constant_ptr()->type() == T_LONG && src->as_constant_ptr()->as_jlong() == 0) ||
-         (src->as_constant_ptr()->type() == T_FLOAT && src->as_constant_ptr()->as_jint_bits() == 0) ||
-         (src->as_constant_ptr()->type() == T_DOUBLE && src->as_constant_ptr()->as_jlong_bits() == 0),
-        "cannot handle otherwise");
-  assert(dest->as_address_ptr()->type() == type, "should be");
-
-  Address addr = as_Address(dest->as_address_ptr());
-  int null_check_offset = code_offset();
-  switch (type) {
-    case T_OBJECT:  // fall through
-    case T_ARRAY:
-        if (UseCompressedOops && !wide) {
-          __ str_w(ZR, addr);
-        } else {
-          __ str(ZR, addr);
-        }
-        break;
-    case T_ADDRESS: // fall through
-    case T_DOUBLE:  // fall through
-    case T_LONG:    __ str(ZR, addr);   break;
-    case T_FLOAT:   // fall through
-    case T_INT:     __ str_w(ZR, addr); break;
-    case T_BOOLEAN: // fall through
-    case T_BYTE:    __ strb(ZR, addr);  break;
-    case T_CHAR:    // fall through
-    case T_SHORT:   __ strh(ZR, addr);  break;
-    default: ShouldNotReachHere();
-  }
-#else
   assert((src->as_constant_ptr()->type() == T_OBJECT && src->as_constant_ptr()->as_jobject() == NULL),"cannot handle otherwise");
   __ mov(Rtemp, 0);
 
   int null_check_offset = code_offset();
   __ str(Rtemp, as_Address(dest->as_address_ptr()));
-#endif // AARCH64
 
   if (info != NULL) {
-#ifndef AARCH64
     assert(false, "arm32 didn't support this before, investigate if bug");
-#endif
     add_debug_info_for_null_check(null_check_offset, info);
   }
 }
@@ -539,27 +448,17 @@ void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
   if (src->is_single_cpu()) {
     if (dest->is_single_cpu()) {
       move_regs(src->as_register(), dest->as_register());
-#ifdef AARCH64
-    } else if (dest->is_double_cpu()) {
-      assert ((src->type() == T_OBJECT) || (src->type() == T_ARRAY) || (src->type() == T_ADDRESS), "invalid src type");
-      move_regs(src->as_register(), dest->as_register_lo());
-#else
     } else if (dest->is_single_fpu()) {
       __ fmsr(dest->as_float_reg(), src->as_register());
-#endif // AARCH64
     } else {
       ShouldNotReachHere();
     }
   } else if (src->is_double_cpu()) {
-#ifdef AARCH64
-    move_regs(src->as_register_lo(), dest->as_register_lo());
-#else
     if (dest->is_double_cpu()) {
       __ long_move(dest->as_register_lo(), dest->as_register_hi(), src->as_register_lo(), src->as_register_hi());
     } else {
       __ fmdrr(dest->as_double_reg(), src->as_register_lo(), src->as_register_hi());
     }
-#endif // AARCH64
   } else if (src->is_single_fpu()) {
     if (dest->is_single_fpu()) {
       __ mov_float(dest->as_float_reg(), src->as_float_reg());
@@ -572,11 +471,7 @@ void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
     if (dest->is_double_fpu()) {
       __ mov_double(dest->as_double_reg(), src->as_double_reg());
     } else if (dest->is_double_cpu()) {
-#ifdef AARCH64
-      __ fmov_xd(dest->as_register_lo(), src->as_double_reg());
-#else
       __ fmrrd(dest->as_register_lo(), dest->as_register_hi(), src->as_double_reg());
-#endif // AARCH64
     } else {
       ShouldNotReachHere();
     }
@@ -593,12 +488,10 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po
     frame_map()->address_for_slot(dest->single_stack_ix()) :
     frame_map()->address_for_slot(dest->double_stack_ix());
 
-#ifndef AARCH64
   assert(lo_word_offset_in_bytes == 0 && hi_word_offset_in_bytes == 4, "little ending");
   if (src->is_single_fpu() || src->is_double_fpu()) {
     if (addr.disp() >= 1024) { BAILOUT("Too exotic case to handle here"); }
   }
-#endif // !AARCH64
 
   if (src->is_single_cpu()) {
     switch (type) {
@@ -613,9 +506,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po
     }
   } else if (src->is_double_cpu()) {
     __ str(src->as_register_lo(), addr);
-#ifndef AARCH64
     __ str(src->as_register_hi(), frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes));
-#endif // !AARCH64
   } else if (src->is_single_fpu()) {
     __ str_float(src->as_float_reg(), addr);
   } else if (src->is_double_fpu()) {
@@ -636,15 +527,7 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type,
 
   PatchingStub* patch = NULL;
   if (needs_patching) {
-#ifdef AARCH64
-    // Same alignment of reg2mem code and PatchingStub code. Required to make copied bind_literal() code properly aligned.
-    __ align(wordSize);
-#endif
     patch = new PatchingStub(_masm, PatchingStub::access_field_id);
-#ifdef AARCH64
-    // Extra nop for MT safe patching
-    __ nop();
-#endif // AARCH64
   }
 
   int null_check_offset = code_offset();
@@ -653,24 +536,13 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type,
     case T_ARRAY:
     case T_OBJECT:
       if (UseCompressedOops && !wide) {
-#ifdef AARCH64
-        const Register temp_src = Rtemp;
-        assert_different_registers(temp_src, src->as_register());
-        __ encode_heap_oop(temp_src, src->as_register());
-        null_check_offset = code_offset();
-        __ str_32(temp_src, as_Address(to_addr));
-#else
         ShouldNotReachHere();
-#endif // AARCH64
       } else {
         __ str(src->as_register(), as_Address(to_addr));
       }
       break;
 
     case T_ADDRESS:
-#ifdef AARCH64
-    case T_LONG:
-#endif // AARCH64
       __ str(src->as_pointer_register(), as_Address(to_addr));
       break;
 
@@ -691,17 +563,6 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type,
       __ str_32(src->as_register(), as_Address(to_addr));
       break;
 
-#ifdef AARCH64
-
-    case T_FLOAT:
-      __ str_s(src->as_float_reg(), as_Address(to_addr));
-      break;
-
-    case T_DOUBLE:
-      __ str_d(src->as_double_reg(), as_Address(to_addr));
-      break;
-
-#else // AARCH64
 
 #ifdef __SOFTFP__
     case T_DOUBLE:
@@ -768,7 +629,6 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type,
       break;
 #endif // __SOFTFP__
 
-#endif // AARCH64
 
     default:
       ShouldNotReachHere();
@@ -796,12 +656,10 @@ void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
     frame_map()->address_for_slot(src->single_stack_ix()) :
     frame_map()->address_for_slot(src->double_stack_ix());
 
-#ifndef AARCH64
   assert(lo_word_offset_in_bytes == 0 && hi_word_offset_in_bytes == 4, "little ending");
   if (dest->is_single_fpu() || dest->is_double_fpu()) {
     if (addr.disp() >= 1024) { BAILOUT("Too exotic case to handle here"); }
   }
-#endif // !AARCH64
 
   if (dest->is_single_cpu()) {
     switch (type) {
@@ -819,9 +677,7 @@ void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
     }
   } else if (dest->is_double_cpu()) {
     __ ldr(dest->as_register_lo(), addr);
-#ifndef AARCH64
     __ ldr(dest->as_register_hi(), frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes));
-#endif // !AARCH64
   } else if (dest->is_single_fpu()) {
     __ ldr_float(dest->as_float_reg(), addr);
   } else if (dest->is_double_fpu()) {
@@ -856,12 +712,8 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
     assert(src->is_double_stack(), "must be");
     __ ldr(Rtemp, frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes));
     __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes));
-#ifdef AARCH64
-    assert(lo_word_offset_in_bytes == 0, "adjust this code");
-#else
     __ ldr(Rtemp, frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes));
     __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes));
-#endif // AARCH64
   }
 }
 
@@ -878,10 +730,6 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type,
   PatchingStub* patch = NULL;
   if (patch_code != lir_patch_none) {
     patch = new PatchingStub(_masm, PatchingStub::access_field_id);
-#ifdef AARCH64
-    // Extra nop for MT safe patching
-    __ nop();
-#endif // AARCH64
   }
   if (info != NULL) {
     add_debug_info_for_null_check_here(info);
@@ -905,14 +753,10 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type,
       }
       break;
 
-#ifdef AARCH64
-    case T_LONG:
-#else
     case T_INT:
 #ifdef __SOFTFP__
     case T_FLOAT:
 #endif // __SOFTFP__
-#endif // AARCH64
       __ ldr(dest->as_pointer_register(), as_Address(addr));
       break;
 
@@ -932,21 +776,6 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type,
       __ ldrsh(dest->as_register(), as_Address(addr));
       break;
 
-#ifdef AARCH64
-
-    case T_INT:
-      __ ldr_w(dest->as_register(), as_Address(addr));
-      break;
-
-    case T_FLOAT:
-      __ ldr_s(dest->as_float_reg(), as_Address(addr));
-      break;
-
-    case T_DOUBLE:
-      __ ldr_d(dest->as_double_reg(), as_Address(addr));
-      break;
-
-#else // AARCH64
 
 #ifdef __SOFTFP__
     case T_DOUBLE:
@@ -1013,7 +842,6 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type,
       break;
 #endif // __SOFTFP__
 
-#endif // AARCH64
 
     default:
       ShouldNotReachHere();
@@ -1027,23 +855,6 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type,
     patching_epilog(patch, patch_code, base_reg, info);
   }
 
-#ifdef AARCH64
-  switch (type) {
-    case T_ARRAY:
-    case T_OBJECT:
-      if (UseCompressedOops && !wide) {
-        __ decode_heap_oop(dest->as_register());
-      }
-      __ verify_oop(dest->as_register());
-      break;
-
-    case T_ADDRESS:
-      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-        __ decode_klass_not_null(dest->as_register());
-      }
-      break;
-  }
-#endif // AARCH64
 }
 
 
@@ -1070,48 +881,13 @@ void LIR_Assembler::emit_op3(LIR_Op3* op) {
       // x/0x80000000 is a special case, since dividend is a power of two, but is negative.
       // The only possible result values are 0 and 1, with 1 only for dividend == divisor == 0x80000000.
       __ cmp_32(left, c);
-#ifdef AARCH64
-      __ cset(dest, eq);
-#else
       __ mov(dest, 0, ne);
       __ mov(dest, 1, eq);
-#endif // AARCH64
     }
   } else {
-#ifdef AARCH64
-    Register left  = op->in_opr1()->as_pointer_register();
-    Register right = op->in_opr2()->as_pointer_register();
-    Register dest  = op->result_opr()->as_pointer_register();
-
-    switch (op->code()) {
-      case lir_idiv:
-        if (is_32) {
-          __ sdiv_w(dest, left, right);
-        } else {
-          __ sdiv(dest, left, right);
-        }
-        break;
-      case lir_irem: {
-        Register tmp = op->in_opr3()->as_pointer_register();
-        assert_different_registers(left, tmp);
-        assert_different_registers(right, tmp);
-        if (is_32) {
-          __ sdiv_w(tmp, left, right);
-          __ msub_w(dest, right, tmp, left);
-        } else {
-          __ sdiv(tmp, left, right);
-          __ msub(dest, right, tmp, left);
-        }
-        break;
-      }
-      default:
-        ShouldNotReachHere();
-    }
-#else
     assert(op->code() == lir_idiv || op->code() == lir_irem, "unexpected op3");
     __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::runtime_call_type);
     add_debug_info_for_div0_here(op->info());
-#endif // AARCH64
   }
 }
 
@@ -1128,9 +904,7 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
   assert (op->code() != lir_cond_float_branch, "this should be impossible");
 #else
   if (op->code() == lir_cond_float_branch) {
-#ifndef AARCH64
     __ fmstat();
-#endif // !AARCH64
     __ b(*(op->ublock()->label()), vs);
   }
 #endif // __SOFTFP__
@@ -1157,12 +931,8 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
 
   switch (op->bytecode()) {
     case Bytecodes::_i2l:
-#ifdef AARCH64
-      __ sign_extend(dest->as_register_lo(), src->as_register(), 32);
-#else
       move_regs(src->as_register(), dest->as_register_lo());
       __ mov(dest->as_register_hi(), AsmOperand(src->as_register(), asr, 31));
-#endif // AARCH64
       break;
     case Bytecodes::_l2i:
       move_regs(src->as_register_lo(), dest->as_register());
@@ -1183,51 +953,21 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
       __ convert_d2f(dest->as_float_reg(), src->as_double_reg());
       break;
     case Bytecodes::_i2f:
-#ifdef AARCH64
-      __ scvtf_sw(dest->as_float_reg(), src->as_register());
-#else
       __ fmsr(Stemp, src->as_register());
       __ fsitos(dest->as_float_reg(), Stemp);
-#endif // AARCH64
       break;
     case Bytecodes::_i2d:
-#ifdef AARCH64
-      __ scvtf_dw(dest->as_double_reg(), src->as_register());
-#else
       __ fmsr(Stemp, src->as_register());
       __ fsitod(dest->as_double_reg(), Stemp);
-#endif // AARCH64
       break;
     case Bytecodes::_f2i:
-#ifdef AARCH64
-      __ fcvtzs_ws(dest->as_register(), src->as_float_reg());
-#else
       __ ftosizs(Stemp, src->as_float_reg());
       __ fmrs(dest->as_register(), Stemp);
-#endif // AARCH64
       break;
     case Bytecodes::_d2i:
-#ifdef AARCH64
-      __ fcvtzs_wd(dest->as_register(), src->as_double_reg());
-#else
       __ ftosizd(Stemp, src->as_double_reg());
       __ fmrs(dest->as_register(), Stemp);
-#endif // AARCH64
-      break;
-#ifdef AARCH64
-    case Bytecodes::_l2f:
-      __ scvtf_sx(dest->as_float_reg(), src->as_register_lo());
-      break;
-    case Bytecodes::_l2d:
-      __ scvtf_dx(dest->as_double_reg(), src->as_register_lo());
-      break;
-    case Bytecodes::_f2l:
-      __ fcvtzs_xs(dest->as_register_lo(), src->as_float_reg());
-      break;
-    case Bytecodes::_d2l:
-      __ fcvtzs_xd(dest->as_register_lo(), src->as_double_reg());
       break;
-#endif // AARCH64
     default:
       ShouldNotReachHere();
   }
@@ -1333,11 +1073,7 @@ void LIR_Assembler::typecheck_profile_helper1(ciMethod* method, int bci,
   assert_different_registers(obj, mdo, data_val);
   setup_md_access(method, bci, md, data, mdo_offset_bias);
   Label not_null;
-#ifdef AARCH64
-  __ cbnz(obj, not_null);
-#else
   __ b(not_null, ne);
-#endif // AARCH64
   __ mov_metadata(mdo, md->constant_encoding());
   if (mdo_offset_bias > 0) {
     __ mov_slow(data_val, mdo_offset_bias);
@@ -1379,13 +1115,9 @@ void LIR_Assembler::typecheck_profile_helper2(ciMethodData* md, ciProfileData* d
   __ b(*failure);
 }
 
-// Sets `res` to true, if `cond` holds. On AArch64 also sets `res` to false if `cond` does not hold.
+// Sets `res` to true, if `cond` holds.
 static void set_instanceof_result(MacroAssembler* _masm, Register res, AsmCondition cond) {
-#ifdef AARCH64
-  __ cset(res, cond);
-#else
   __ mov(res, 1, cond);
-#endif // AARCH64
 }
 
 
@@ -1412,9 +1144,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
       Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
 
       if (op->should_profile()) {
-#ifndef AARCH64
         __ cmp(value, 0);
-#endif // !AARCH64
         typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, value, k_RInfo, Rtemp, &done);
       } else {
         __ cbz(value, done);
@@ -1476,57 +1206,6 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
       Label *failure_target = op->should_profile() ? &profile_cast_failure : op->stub()->entry();
       Label *success_target = op->should_profile() ? &profile_cast_success : &done;
 
-#ifdef AARCH64
-      move_regs(obj, res);
-      if (op->should_profile()) {
-        typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, res, klass_RInfo, Rtemp, &done);
-      } else {
-        __ cbz(obj, done);
-      }
-      if (k->is_loaded()) {
-        __ mov_metadata(k_RInfo, k->constant_encoding());
-      } else {
-        if (res != obj) {
-          op->info_for_patch()->add_register_oop(FrameMap::as_oop_opr(res));
-        }
-        klass2reg_with_patching(k_RInfo, op->info_for_patch());
-      }
-      __ load_klass(klass_RInfo, res);
-
-      if (op->fast_check()) {
-        __ cmp(klass_RInfo, k_RInfo);
-        __ b(*failure_target, ne);
-      } else if (k->is_loaded()) {
-        __ ldr(Rtemp, Address(klass_RInfo, k->super_check_offset()));
-        if (in_bytes(Klass::secondary_super_cache_offset()) != (int) k->super_check_offset()) {
-          __ cmp(Rtemp, k_RInfo);
-          __ b(*failure_target, ne);
-        } else {
-          __ cmp(klass_RInfo, k_RInfo);
-          __ cond_cmp(Rtemp, k_RInfo, ne);
-          __ b(*success_target, eq);
-          assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
-          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
-          __ cbz(R0, *failure_target);
-        }
-      } else {
-        __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset()));
-        // check for immediate positive hit
-        __ ldr(Rtemp, Address(klass_RInfo, Rtemp));
-        __ cmp(klass_RInfo, k_RInfo);
-        __ cond_cmp(Rtemp, k_RInfo, ne);
-        __ b(*success_target, eq);
-        // check for immediate negative hit
-        __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset()));
-        __ cmp(Rtemp, in_bytes(Klass::secondary_super_cache_offset()));
-        __ b(*failure_target, ne);
-        // slow case
-        assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup");
-        __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
-        __ cbz(R0, *failure_target);
-      }
-
-#else // AARCH64
 
       __ movs(res, obj);
       if (op->should_profile()) {
@@ -1581,7 +1260,6 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
         __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
         __ cbz(R0, *failure_target);
       }
-#endif // AARCH64
 
       if (op->should_profile()) {
         Register mdo  = klass_RInfo, recv = k_RInfo, tmp1 = Rtemp;
@@ -1611,20 +1289,12 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
       Label *failure_target = op->should_profile() ? &profile_cast_failure : &done;
       Label *success_target = op->should_profile() ? &profile_cast_success : &done;
 
-#ifdef AARCH64
-      move_regs(obj, res);
-#else
       __ movs(res, obj);
-#endif // AARCH64
 
       if (op->should_profile()) {
         typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, res, klass_RInfo, Rtemp, &done);
       } else {
-#ifdef AARCH64
-        __ cbz(obj, done); // If obj == NULL, res is false
-#else
         __ b(done, eq);
-#endif // AARCH64
       }
 
       if (k->is_loaded()) {
@@ -1635,11 +1305,9 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
       }
       __ load_klass(klass_RInfo, res);
 
-#ifndef AARCH64
       if (!op->should_profile()) {
         __ mov(res, 0);
       }
-#endif // !AARCH64
 
       if (op->fast_check()) {
         __ cmp(klass_RInfo, k_RInfo);
@@ -1677,21 +1345,11 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
         // check for immediate positive hit
         __ cmp(klass_RInfo, k_RInfo);
         if (!op->should_profile()) {
-#ifdef AARCH64
-          // TODO-AARCH64 check if separate conditional branch is more efficient than ldr+cond_cmp
-          __ ldr(res, Address(klass_RInfo, Rtemp));
-#else
           __ ldr(res, Address(klass_RInfo, Rtemp), ne);
-#endif // AARCH64
           __ cond_cmp(res, k_RInfo, ne);
           set_instanceof_result(_masm, res, eq);
         } else {
-#ifdef AARCH64
-          // TODO-AARCH64 check if separate conditional branch is more efficient than ldr+cond_cmp
-          __ ldr(Rtemp, Address(klass_RInfo, Rtemp));
-#else
           __ ldr(Rtemp, Address(klass_RInfo, Rtemp), ne);
-#endif // AARCH64
           __ cond_cmp(Rtemp, k_RInfo, ne);
         }
         __ b(*success_target, eq);
@@ -1701,11 +1359,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
         }
         __ cmp(Rtemp, in_bytes(Klass::secondary_super_cache_offset()));
         if (!op->should_profile()) {
-#ifdef AARCH64
-          __ mov(res, 0);
-#else
           __ mov(res, 0, ne);
-#endif // AARCH64
         }
         __ b(*failure_target, ne);
         // slow case
@@ -1747,41 +1401,6 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
   //   } else {
   //     dest = 0;
   //   }
-#ifdef AARCH64
-  Label retry, done;
-  Register addr = op->addr()->as_pointer_register();
-  Register cmpval = op->cmp_value()->as_pointer_register();
-  Register newval = op->new_value()->as_pointer_register();
-  Register dest = op->result_opr()->as_pointer_register();
-  assert_different_registers(dest, addr, cmpval, newval, Rtemp);
-
-  if (UseCompressedOops && op->code() == lir_cas_obj) {
-    Register tmp1 = op->tmp1()->as_pointer_register();
-    Register tmp2 = op->tmp2()->as_pointer_register();
-    assert_different_registers(dest, addr, cmpval, newval, tmp1, tmp2, Rtemp);
-    __ encode_heap_oop(tmp1, cmpval); cmpval = tmp1;
-    __ encode_heap_oop(tmp2, newval); newval = tmp2;
-  }
-
-  __ mov(dest, ZR);
-  __ bind(retry);
-  if (((op->code() == lir_cas_obj) && !UseCompressedOops) || op->code() == lir_cas_long) {
-    __ ldaxr(Rtemp, addr);
-    __ cmp(Rtemp, cmpval);
-    __ b(done, ne);
-    __ stlxr(Rtemp, newval, addr);
-  } else if (((op->code() == lir_cas_obj) && UseCompressedOops) || op->code() == lir_cas_int) {
-    __ ldaxr_w(Rtemp, addr);
-    __ cmp_w(Rtemp, cmpval);
-    __ b(done, ne);
-    __ stlxr_w(Rtemp, newval, addr);
-  } else {
-    ShouldNotReachHere();
-  }
-  __ cbnz_w(Rtemp, retry);
-  __ mov(dest, 1);
-  __ bind(done);
-#else
   // FIXME: membar_release
   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp);
   Register addr = op->addr()->is_register() ?
@@ -1818,7 +1437,6 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
   } else {
     Unimplemented();
   }
-#endif // AARCH64
   // FIXME: is full membar really needed instead of just membar_acquire?
   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
 }
@@ -1841,36 +1459,6 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
     }
   }
 
-#ifdef AARCH64
-
-  // TODO-AARCH64 implement it more efficiently
-
-  if (opr1->is_register()) {
-    reg2reg(opr1, result);
-  } else if (opr1->is_stack()) {
-    stack2reg(opr1, result, result->type());
-  } else if (opr1->is_constant()) {
-    const2reg(opr1, result, lir_patch_none, NULL);
-  } else {
-    ShouldNotReachHere();
-  }
-
-  Label skip;
-  __ b(skip, acond);
-
-  if (opr2->is_register()) {
-    reg2reg(opr2, result);
-  } else if (opr2->is_stack()) {
-    stack2reg(opr2, result, result->type());
-  } else if (opr2->is_constant()) {
-    const2reg(opr2, result, lir_patch_none, NULL);
-  } else {
-    ShouldNotReachHere();
-  }
-
-  __ bind(skip);
-
-#else
   for (;;) {                         // two iterations only
     if (opr1 == result) {
       // do nothing
@@ -1930,10 +1518,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
     opr1 = opr2;
     acond = ncond;
   }
-#endif // AARCH64
 }
 
-#if defined(AARCH64) || defined(ASSERT)
+#ifdef ASSERT
 static int reg_size(LIR_Opr op) {
   switch (op->type()) {
   case T_FLOAT:
@@ -1965,37 +1552,6 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
     int scale = addr->scale();
     AsmShift shift = lsl;
 
-#ifdef AARCH64
-    bool is_index_extended = reg_size(addr->base()) > reg_size(addr->index());
-    if (scale < 0) {
-      scale = -scale;
-      shift = lsr;
-    }
-    assert(shift == lsl || !is_index_extended, "could not have extend and right shift in one operand");
-    assert(0 <= scale && scale <= 63, "scale is too large");
-
-    if (is_index_extended) {
-      assert(scale <= 4, "scale is too large for add with extended register");
-      assert(addr->index()->is_single_cpu(), "should be");
-      assert(addr->index()->type() == T_INT, "should be");
-      assert(dest->is_double_cpu(), "should be");
-      assert(code == lir_add, "special case of add with extended register");
-
-      __ add(res, lreg, addr->index()->as_register(), ex_sxtw, scale);
-      return;
-    } else if (reg_size(dest) == BytesPerInt) {
-      assert(reg_size(addr->base()) == reg_size(addr->index()), "should be");
-      assert(reg_size(addr->base()) == reg_size(dest), "should be");
-
-      AsmOperand operand(addr->index()->as_pointer_register(), shift, scale);
-      switch (code) {
-        case lir_add: __ add_32(res, lreg, operand); break;
-        case lir_sub: __ sub_32(res, lreg, operand); break;
-        default: ShouldNotReachHere();
-      }
-      return;
-    }
-#endif // AARCH64
 
     assert(reg_size(addr->base()) == reg_size(addr->index()), "should be");
     assert(reg_size(addr->base()) == reg_size(dest), "should be");
@@ -2008,7 +1564,6 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
       default: ShouldNotReachHere();
     }
 
-#ifndef AARCH64
   } else if (left->is_address()) {
     assert(code == lir_sub && right->is_single_cpu(), "special case used by strength_reduce_multiply()");
     const LIR_Address* addr = left->as_address_ptr();
@@ -2016,15 +1571,9 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
     const Register rreg = right->as_register();
     assert(addr->base()->as_register() == rreg && addr->index()->is_register() && addr->disp() == 0, "must be");
     __ rsb(res, rreg, AsmOperand(addr->index()->as_register(), lsl, addr->scale()));
-#endif // !AARCH64
 
   } else if (dest->is_single_cpu()) {
     assert(left->is_single_cpu(), "unexpected left operand");
-#ifdef AARCH64
-    assert(dest->type() == T_INT, "unexpected dest type");
-    assert(left->type() == T_INT, "unexpected left type");
-    assert(right->type() == T_INT, "unexpected right type");
-#endif // AARCH64
 
     const Register res = dest->as_register();
     const Register lreg = left->as_register();
@@ -2051,36 +1600,6 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
     }
 
   } else if (dest->is_double_cpu()) {
-#ifdef AARCH64
-    assert(left->is_double_cpu() ||
-           (left->is_single_cpu() && ((left->type() == T_OBJECT) || (left->type() == T_ARRAY) || (left->type() == T_ADDRESS))),
-           "unexpected left operand");
-
-    const Register res = dest->as_register_lo();
-    const Register lreg = left->as_pointer_register();
-
-    if (right->is_constant()) {
-      assert(right->type() == T_LONG, "unexpected right type");
-      assert((right->as_constant_ptr()->as_jlong() >> 24) == 0, "out of range");
-      jint imm = (jint)right->as_constant_ptr()->as_jlong();
-      switch (code) {
-        case lir_add: __ add(res, lreg, imm); break;
-        case lir_sub: __ sub(res, lreg, imm); break;
-        default: ShouldNotReachHere();
-      }
-    } else {
-      assert(right->is_double_cpu() ||
-             (right->is_single_cpu() && ((right->type() == T_OBJECT) || (right->type() == T_ARRAY) || (right->type() == T_ADDRESS))),
-             "unexpected right operand");
-      const Register rreg = right->as_pointer_register();
-      switch (code) {
-        case lir_add: __ add(res, lreg, rreg); break;
-        case lir_sub: __ sub(res, lreg, rreg); break;
-        case lir_mul: __ mul(res, lreg, rreg); break;
-        default: ShouldNotReachHere();
-      }
-    }
-#else // AARCH64
     Register res_lo = dest->as_register_lo();
     Register res_hi = dest->as_register_hi();
     Register lreg_lo = left->as_register_lo();
@@ -2124,7 +1643,6 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
       }
     }
     move_regs(res_lo, dest->as_register_lo());
-#endif // AARCH64
 
   } else if (dest->is_single_fpu()) {
     assert(left->is_single_fpu(), "must be");
@@ -2181,11 +1699,6 @@ void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
   assert(left->is_register(), "wrong items state");
 
   if (dest->is_single_cpu()) {
-#ifdef AARCH64
-    assert (dest->type() == T_INT, "unexpected result type");
-    assert (left->type() == T_INT, "unexpected left type");
-    assert (right->type() == T_INT, "unexpected right type");
-#endif // AARCH64
 
     const Register res = dest->as_register();
     const Register lreg = left->as_register();
@@ -2215,10 +1728,6 @@ void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
     assert(dest->is_double_cpu(), "should be");
     Register res_lo = dest->as_register_lo();
 
-#ifdef AARCH64
-    assert ((left->is_single_cpu() && left->is_oop_register()) || left->is_double_cpu(), "should be");
-    const Register lreg_lo = left->as_pointer_register();
-#else
     assert (dest->type() == T_LONG, "unexpected result type");
     assert (left->type() == T_LONG, "unexpected left type");
     assert (right->type() == T_LONG, "unexpected right type");
@@ -2226,19 +1735,8 @@ void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
     const Register res_hi = dest->as_register_hi();
     const Register lreg_lo = left->as_register_lo();
     const Register lreg_hi = left->as_register_hi();
-#endif // AARCH64
 
     if (right->is_register()) {
-#ifdef AARCH64
-      assert ((right->is_single_cpu() && right->is_oop_register()) || right->is_double_cpu(), "should be");
-      const Register rreg_lo = right->as_pointer_register();
-      switch (code) {
-        case lir_logic_and: __ andr(res_lo, lreg_lo, rreg_lo); break;
-        case lir_logic_or:  __ orr (res_lo, lreg_lo, rreg_lo); break;
-        case lir_logic_xor: __ eor (res_lo, lreg_lo, rreg_lo); break;
-        default: ShouldNotReachHere();
-      }
-#else
       const Register rreg_lo = right->as_register_lo();
       const Register rreg_hi = right->as_register_hi();
       if (res_lo == lreg_hi || res_lo == rreg_hi) {
@@ -2261,23 +1759,8 @@ void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
           ShouldNotReachHere();
       }
       move_regs(res_lo, dest->as_register_lo());
-#endif // AARCH64
     } else {
       assert(right->is_constant(), "must be");
-#ifdef AARCH64
-      const julong c = (julong)right->as_constant_ptr()->as_jlong();
-      Assembler::LogicalImmediate imm(c, false);
-      if (imm.is_encoded()) {
-        switch (code) {
-          case lir_logic_and: __ andr(res_lo, lreg_lo, imm); break;
-          case lir_logic_or:  __ orr (res_lo, lreg_lo, imm); break;
-          case lir_logic_xor: __ eor (res_lo, lreg_lo, imm); break;
-          default: ShouldNotReachHere();
-        }
-      } else {
-        BAILOUT("64 bit constant cannot be inlined");
-      }
-#else
       const jint c_lo = (jint) right->as_constant_ptr()->as_jlong();
       const jint c_hi = (jint) (right->as_constant_ptr()->as_jlong() >> 32);
       // Case for logic_or from do_ClassIDIntrinsic()
@@ -2312,36 +1795,11 @@ void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
       } else {
         BAILOUT("64 bit constant cannot be inlined");
       }
-#endif // AARCH64
     }
   }
 }
 
 
-#ifdef AARCH64
-
-void LIR_Assembler::long_compare_helper(LIR_Opr opr1, LIR_Opr opr2) {
-  assert(opr1->is_double_cpu(), "should be");
-  Register x = opr1->as_register_lo();
-
-  if (opr2->is_double_cpu()) {
-    Register y = opr2->as_register_lo();
-    __ cmp(x, y);
-
-  } else {
-    assert(opr2->is_constant(), "should be");
-    assert(opr2->as_constant_ptr()->type() == T_LONG, "long constant expected");
-    jlong c = opr2->as_jlong();
-    assert(((c >> 31) == 0) || ((c >> 31) == -1), "immediate is out of range");
-    if (c >= 0) {
-      __ cmp(x, (jint)c);
-    } else {
-      __ cmn(x, (jint)(-c));
-    }
-  }
-}
-
-#endif // AARCH64
 
 void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
   if (opr1->is_single_cpu()) {
@@ -2384,9 +1842,6 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
       ShouldNotReachHere();
     }
   } else if (opr1->is_double_cpu()) {
-#ifdef AARCH64
-    long_compare_helper(opr1, opr2);
-#else
     Register xlo = opr1->as_register_lo();
     Register xhi = opr1->as_register_hi();
     if (opr2->is_constant() && opr2->as_jlong() == 0) {
@@ -2405,7 +1860,6 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
     } else {
       ShouldNotReachHere();
     }
-#endif // AARCH64
   } else if (opr1->is_single_fpu()) {
     if (opr2->is_constant()) {
       assert(opr2->as_jfloat() == 0.0f, "cannot handle otherwise");
@@ -2429,15 +1883,6 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
   const Register res = dst->as_register();
   if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
     comp_op(lir_cond_unknown, left, right, op);
-#ifdef AARCH64
-    if (code == lir_ucmp_fd2i) {         // unordered is less
-      __ cset(res, gt);                  // 1 if '>', else 0
-      __ csinv(res, res, ZR, ge);        // previous value if '>=', else -1
-    } else {
-      __ cset(res, hi);                  // 1 if '>' or unordered, else 0
-      __ csinv(res, res, ZR, pl);        // previous value if '>=' or unordered, else -1
-    }
-#else
     __ fmstat();
     if (code == lir_ucmp_fd2i) {  // unordered is less
       __ mvn(res, 0, lt);
@@ -2447,17 +1892,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
       __ mvn(res, 0, cc);
     }
     __ mov(res, 0, eq);
-#endif // AARCH64
 
   } else {
     assert(code == lir_cmp_l2i, "must be");
 
-#ifdef AARCH64
-    long_compare_helper(left, right);
-
-    __ cset(res, gt);            // 1 if '>', else 0
-    __ csinv(res, res, ZR, ge);  // previous value if '>=', else -1
-#else
     Label done;
     const Register xlo = left->as_register_lo();
     const Register xhi = left->as_register_hi();
@@ -2471,7 +1909,6 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
     __ mov(res, 1, hi);
     __ mvn(res, 0, lo);
     __ bind(done);
-#endif // AARCH64
   }
 }
 
@@ -2492,19 +1929,15 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall *op) {
   bool near_range = __ cache_fully_reachable();
   address oop_address = pc();
 
-  bool use_movw = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw());
+  bool use_movw = VM_Version::supports_movw();
 
   // Ricklass may contain something that is not a metadata pointer so
   // mov_metadata can't be used
   InlinedAddress value((address)Universe::non_oop_word());
   InlinedAddress addr(op->addr());
   if (use_movw) {
-#ifdef AARCH64
-    ShouldNotReachHere();
-#else
     __ movw(Ricklass, ((unsigned int)Universe::non_oop_word()) & 0xffff);
     __ movt(Ricklass, ((unsigned int)Universe::non_oop_word()) >> 16);
-#endif // AARCH64
   } else {
     // No movw/movt, must be load a pc relative value but no
     // relocation so no metadata table to load from.
@@ -2596,35 +2029,6 @@ void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
 }
 
 void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
-#ifdef AARCH64
-  if (dest->is_single_cpu()) {
-    Register res = dest->as_register();
-    Register x = left->as_register();
-    Register y = count->as_register();
-    assert (dest->type() == T_INT, "unexpected result type");
-    assert (left->type() == T_INT, "unexpected left type");
-
-    switch (code) {
-      case lir_shl:  __ lslv_w(res, x, y); break;
-      case lir_shr:  __ asrv_w(res, x, y); break;
-      case lir_ushr: __ lsrv_w(res, x, y); break;
-      default: ShouldNotReachHere();
-    }
-  } else if (dest->is_double_cpu()) {
-    Register res = dest->as_register_lo();
-    Register x = left->as_register_lo();
-    Register y = count->as_register();
-
-    switch (code) {
-      case lir_shl:  __ lslv(res, x, y); break;
-      case lir_shr:  __ asrv(res, x, y); break;
-      case lir_ushr: __ lsrv(res, x, y); break;
-      default: ShouldNotReachHere();
-    }
-  } else {
-    ShouldNotReachHere();
-  }
-#else
   AsmShift shift = lsl;
   switch (code) {
     case lir_shl:  shift = lsl; break;
@@ -2659,43 +2063,10 @@ void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr
   } else {
     ShouldNotReachHere();
   }
-#endif // AARCH64
 }
 
 
 void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
-#ifdef AARCH64
-  if (dest->is_single_cpu()) {
-    assert (dest->type() == T_INT, "unexpected result type");
-    assert (left->type() == T_INT, "unexpected left type");
-    count &= 31;
-    if (count != 0) {
-      switch (code) {
-        case lir_shl:  __ _lsl_w(dest->as_register(), left->as_register(), count); break;
-        case lir_shr:  __ _asr_w(dest->as_register(), left->as_register(), count); break;
-        case lir_ushr: __ _lsr_w(dest->as_register(), left->as_register(), count); break;
-        default: ShouldNotReachHere();
-      }
-    } else {
-      move_regs(left->as_register(), dest->as_register());
-    }
-  } else if (dest->is_double_cpu()) {
-    count &= 63;
-    if (count != 0) {
-      switch (code) {
-        case lir_shl:  __ _lsl(dest->as_register_lo(), left->as_register_lo(), count); break;
-        case lir_shr:  __ _asr(dest->as_register_lo(), left->as_register_lo(), count); break;
-        case lir_ushr: __ _lsr(dest->as_register_lo(), left->as_register_lo(), count); break;
-        default: ShouldNotReachHere();
-      }
-    } else {
-      move_regs(left->as_register_lo(), dest->as_register_lo());
-    }
-  } else {
-    ShouldNotReachHere();
-  }
-
-#else
   AsmShift shift = lsl;
   switch (code) {
     case lir_shl:  shift = lsl; break;
@@ -2734,29 +2105,18 @@ void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr de
   } else {
     ShouldNotReachHere();
   }
-#endif // AARCH64
 }
 
 
 // Saves 4 given registers in reserved argument area.
 void LIR_Assembler::save_in_reserved_area(Register r1, Register r2, Register r3, Register r4) {
   verify_reserved_argument_area_size(4);
-#ifdef AARCH64
-  __ stp(r1, r2, Address(SP, 0));
-  __ stp(r3, r4, Address(SP, 2*wordSize));
-#else
   __ stmia(SP, RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3) | RegisterSet(r4));
-#endif // AARCH64
 }
 
 // Restores 4 given registers from reserved argument area.
 void LIR_Assembler::restore_from_reserved_area(Register r1, Register r2, Register r3, Register r4) {
-#ifdef AARCH64
-  __ ldp(r1, r2, Address(SP, 0));
-  __ ldp(r3, r4, Address(SP, 2*wordSize));
-#else
   __ ldmia(SP, RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3) | RegisterSet(r4), no_writeback);
-#endif // AARCH64
 }
 
 
@@ -2771,9 +2131,6 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
   Register tmp2 = Rtemp;
 
   assert(src == R0 && src_pos == R1 && dst == R2 && dst_pos == R3, "code assumption");
-#ifdef AARCH64
-  assert(length == R4, "code assumption");
-#endif // AARCH64
 
   CodeStub* stub = op->stub();
 
@@ -2787,13 +2144,8 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
     // save arguments, because they will be killed by a runtime call
     save_in_reserved_area(R0, R1, R2, R3);
 
-#ifdef AARCH64
-    // save length argument, will be killed by a runtime call
-    __ raw_push(length, ZR);
-#else
     // pass length argument on SP[0]
     __ str(length, Address(SP, -2*wordSize, pre_indexed));  // 2 words for a proper stack alignment
-#endif // AARCH64
 
     address copyfunc_addr = StubRoutines::generic_arraycopy();
     assert(copyfunc_addr != NULL, "generic arraycopy stub required");
@@ -2805,11 +2157,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
     // the stub is in the code cache so close enough
     __ call(copyfunc_addr, relocInfo::runtime_call_type);
 
-#ifdef AARCH64
-    __ raw_pop(length, ZR);
-#else
     __ add(SP, SP, 2*wordSize);
-#endif // AARCH64
 
     __ cbz_32(R0, *stub->continuation());
 
@@ -2983,7 +2331,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
         Register dst_ptr = R1;
         Register len     = R2;
         Register chk_off = R3;
-        Register super_k = AARCH64_ONLY(R4) NOT_AARCH64(tmp);
+        Register super_k = tmp;
 
         __ add(src_ptr, src, arrayOopDesc::base_offset_in_bytes(basic_type));
         __ add_ptr_scaled_int32(src_ptr, src_ptr, src_pos, shift);
@@ -2995,20 +2343,11 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
         int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
         int sco_offset = in_bytes(Klass::super_check_offset_offset());
 
-#ifdef AARCH64
-        __ raw_push(length, ZR); // Preserve length around *copyfunc_addr call
-
-        __ mov(len, length);
-        __ ldr(super_k, Address(tmp, ek_offset)); // super_k == R4 == length, so this load cannot be performed earlier
-        // TODO-AARCH64: check whether it is faster to load super klass early by using tmp and additional mov.
-        __ ldr_u32(chk_off, Address(super_k, sco_offset));
-#else // AARCH64
         __ ldr(super_k, Address(tmp, ek_offset));
 
         __ mov(len, length);
         __ ldr_u32(chk_off, Address(super_k, sco_offset));
         __ push(super_k);
-#endif // AARCH64
 
         __ call(copyfunc_addr, relocInfo::runtime_call_type);
 
@@ -3021,11 +2360,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
         }
 #endif // PRODUCT
 
-#ifdef AARCH64
-        __ raw_pop(length, ZR);
-#else
         __ add(SP, SP, wordSize);  // Drop super_k argument
-#endif // AARCH64
 
         __ cbz_32(R0, *stub->continuation());
         __ mvn_32(tmp, R0);
@@ -3087,9 +2422,6 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
 void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
   assert(op->code() == lir_assert, "must be");
 
-#ifdef AARCH64
-  __ NOT_IMPLEMENTED();
-#else
   if (op->in_opr1()->is_valid()) {
     assert(op->in_opr2()->is_valid(), "both operands must be valid");
     comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op);
@@ -3121,7 +2453,6 @@ void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
     breakpoint();
   }
   __ bind(ok);
-#endif // AARCH64
 }
 #endif // ASSERT
 
@@ -3170,7 +2501,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
   assert_different_registers(mdo, tmp1);
   __ mov_metadata(mdo, md->constant_encoding());
   int mdo_offset_bias = 0;
-  int max_offset = AARCH64_ONLY(4096 << LogBytesPerWord) NOT_AARCH64(4096);
+  int max_offset = 4096;
   if (md->byte_offset_of_slot(data, CounterData::count_offset()) + data->size_in_bytes() >= max_offset) {
     // The offset is large so bias the mdo by the base of the slot so
     // that the ldr can use an immediate offset to reference the slots of the data
@@ -3266,7 +2597,6 @@ void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
 
 
 void LIR_Assembler::align_backward_branch_target() {
-  // TODO-AARCH64 review it
   // Some ARM processors do better with 8-byte branch target alignment
   __ align(8);
 }
@@ -3281,9 +2611,6 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
     assert (left->type() == T_INT, "unexpected left type");
     __ neg_32(dest->as_register(), left->as_register());
   } else if (left->is_double_cpu()) {
-#ifdef AARCH64
-    __ neg(dest->as_register_lo(), left->as_register_lo());
-#else
     Register dest_lo = dest->as_register_lo();
     Register dest_hi = dest->as_register_hi();
     Register src_lo = left->as_register_lo();
@@ -3294,7 +2621,6 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
     __ rsbs(dest_lo, src_lo, 0);
     __ rsc(dest_hi, src_hi, 0);
     move_regs(dest_lo, dest->as_register_lo());
-#endif // AARCH64
   } else if (left->is_single_fpu()) {
     __ neg_float(dest->as_float_reg(), left->as_float_reg());
   } else if (left->is_double_fpu()) {
@@ -3316,9 +2642,6 @@ void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest, LIR_PatchCode patch_cod
     __ add(dest->as_pointer_register(), addr->base()->as_pointer_register(), c);
   } else {
     assert(addr->disp() == 0, "cannot handle otherwise");
-#ifdef AARCH64
-    assert(addr->index()->is_double_cpu(), "should be");
-#endif // AARCH64
     __ add(dest->as_pointer_register(), addr->base()->as_pointer_register(),
            AsmOperand(addr->index()->as_pointer_register(), lsl, addr->scale()));
   }
@@ -3335,9 +2658,6 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg
 
 
 void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
-#ifdef AARCH64
-  Unimplemented(); // TODO-AARCH64: Use stlr/ldar instructions for volatile load/store
-#else
   assert(src->is_double_cpu() && dest->is_address() ||
          src->is_address() && dest->is_double_cpu(),
          "Simple move_op is called for all other cases");
@@ -3379,7 +2699,6 @@ void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type,
   if (info != NULL) {
     add_debug_info_for_null_check(null_check_offset, info);
   }
-#endif // AARCH64
 }
 
 
@@ -3421,9 +2740,6 @@ void LIR_Assembler::get_thread(LIR_Opr result_reg) {
 }
 
 void LIR_Assembler::peephole(LIR_List* lir) {
-#ifdef AARCH64
-  return; // TODO-AARCH64 implement peephole optimizations
-#endif
   LIR_OpList* inst = lir->instructions_list();
   const int inst_length = inst->length();
   for (int i = 0; i < inst_length; i++) {
@@ -3487,38 +2803,23 @@ void LIR_Assembler::peephole(LIR_List* lir) {
 }
 
 void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) {
-#ifdef AARCH64
-  Register ptr = src->as_pointer_register();
-#else
   assert(src->is_address(), "sanity");
   Address addr = as_Address(src->as_address_ptr());
-#endif
 
   if (code == lir_xchg) {
-#ifdef AARCH64
-    if (UseCompressedOops && data->is_oop()) {
-      __ encode_heap_oop(tmp->as_pointer_register(), data->as_register());
-    }
-#endif // AARCH64
   } else {
     assert (!data->is_oop(), "xadd for oops");
   }
 
-#ifndef AARCH64
   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp);
-#endif // !AARCH64
 
   Label retry;
   __ bind(retry);
 
-  if ((data->type() == T_INT) || (data->is_oop() AARCH64_ONLY(&& UseCompressedOops))) {
+  if (data->type() == T_INT || data->is_oop()) {
     Register dst = dest->as_register();
     Register new_val = noreg;
-#ifdef AARCH64
-    __ ldaxr_w(dst, ptr);
-#else
     __ ldrex(dst, addr);
-#endif
     if (code == lir_xadd) {
       Register tmp_reg = tmp->as_register();
       if (data->is_constant()) {
@@ -3537,35 +2838,8 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr
       }
       assert_different_registers(dst, new_val);
     }
-#ifdef AARCH64
-    __ stlxr_w(Rtemp, new_val, ptr);
-#else
     __ strex(Rtemp, new_val, addr);
-#endif // AARCH64
 
-#ifdef AARCH64
-  } else if ((data->type() == T_LONG) || (data->is_oop() && !UseCompressedOops)) {
-    Register dst = dest->as_pointer_register();
-    Register new_val = noreg;
-    __ ldaxr(dst, ptr);
-    if (code == lir_xadd) {
-      Register tmp_reg = tmp->as_pointer_register();
-      if (data->is_constant()) {
-        assert_different_registers(dst, ptr, tmp_reg);
-        jlong c = data->as_constant_ptr()->as_jlong();
-        assert((jlong)((jint)c) == c, "overflow");
-        __ add(tmp_reg, dst, (jint)c);
-      } else {
-        assert_different_registers(dst, ptr, tmp_reg, data->as_pointer_register());
-        __ add(tmp_reg, dst, data->as_pointer_register());
-      }
-      new_val = tmp_reg;
-    } else {
-      new_val = data->as_pointer_register();
-      assert_different_registers(dst, ptr, new_val);
-    }
-    __ stlxr(Rtemp, new_val, ptr);
-#else
   } else if (data->type() == T_LONG) {
     Register dst_lo = dest->as_register_lo();
     Register new_val_lo = noreg;
@@ -3606,7 +2880,6 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr
       assert((new_val_lo->encoding() & 0x1) == 0, "misaligned register pair");
     }
     __ strexd(Rtemp, new_val_lo, addr);
-#endif // AARCH64
   } else {
     ShouldNotReachHere();
   }
@@ -3614,11 +2887,6 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr
   __ cbnz_32(Rtemp, retry);
   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
 
-#ifdef AARCH64
-  if (UseCompressedOops && data->is_oop()) {
-    __ decode_heap_oop(dest->as_register());
-  }
-#endif // AARCH64
 }
 
 #undef __
diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.hpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.hpp
index 83abf67743e..98cf334f1dc 100644
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.hpp
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.hpp
@@ -44,9 +44,6 @@
                                  Label* profile_cast_success, Label* profile_cast_failure,
                                  Label* success, Label* failure);
 
-#ifdef AARCH64
-  void long_compare_helper(LIR_Opr opr1, LIR_Opr opr2);
-#endif // AARCH64
 
   // Saves 4 given registers in reserved argument area.
   void save_in_reserved_area(Register r1, Register r2, Register r3, Register r4);
@@ -55,10 +52,10 @@
   void restore_from_reserved_area(Register r1, Register r2, Register r3, Register r4);
 
   enum {
-    _call_stub_size = AARCH64_ONLY(32) NOT_AARCH64(16),
+    _call_stub_size = 16,
     _call_aot_stub_size = 0,
-    _exception_handler_size = PRODUCT_ONLY(AARCH64_ONLY(256) NOT_AARCH64(68)) NOT_PRODUCT(AARCH64_ONLY(256+216) NOT_AARCH64(68+60)),
-    _deopt_handler_size = AARCH64_ONLY(32) NOT_AARCH64(16)
+    _exception_handler_size = PRODUCT_ONLY(68) NOT_PRODUCT(68+60),
+    _deopt_handler_size = 16
   };
 
  public:
diff --git a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp
index b05fc876f27..5d465f2c6ed 100644
--- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp
@@ -118,19 +118,6 @@ LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
 
 
 bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
-#ifdef AARCH64
-  if (v->type()->as_IntConstant() != NULL) {
-    return v->type()->as_IntConstant()->value() == 0;
-  } else if (v->type()->as_LongConstant() != NULL) {
-    return v->type()->as_LongConstant()->value() == 0;
-  } else if (v->type()->as_ObjectConstant() != NULL) {
-    return v->type()->as_ObjectConstant()->value()->is_null_object();
-  } else if (v->type()->as_FloatConstant() != NULL) {
-    return jint_cast(v->type()->as_FloatConstant()->value()) == 0;
-  } else if (v->type()->as_DoubleConstant() != NULL) {
-    return jlong_cast(v->type()->as_DoubleConstant()->value()) == 0;
-  }
-#endif // AARCH64
   return false;
 }
 
@@ -140,15 +127,10 @@ bool LIRGenerator::can_inline_as_constant(Value v) const {
     return Assembler::is_arith_imm_in_range(v->type()->as_IntConstant()->value());
   } else if (v->type()->as_ObjectConstant() != NULL) {
     return v->type()->as_ObjectConstant()->value()->is_null_object();
-#ifdef AARCH64
-  } else if (v->type()->as_LongConstant() != NULL) {
-    return Assembler::is_arith_imm_in_range(v->type()->as_LongConstant()->value());
-#else
   } else if (v->type()->as_FloatConstant() != NULL) {
     return v->type()->as_FloatConstant()->value() == 0.0f;
   } else if (v->type()->as_DoubleConstant() != NULL) {
     return v->type()->as_DoubleConstant()->value() == 0.0;
-#endif // AARCH64
   }
   return false;
 }
@@ -160,39 +142,6 @@ bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
 }
 
 
-#ifdef AARCH64
-
-static bool can_inline_as_constant_in_cmp(Value v) {
-  jlong constant;
-  if (v->type()->as_IntConstant() != NULL) {
-    constant = v->type()->as_IntConstant()->value();
-  } else if (v->type()->as_LongConstant() != NULL) {
-    constant = v->type()->as_LongConstant()->value();
-  } else if (v->type()->as_ObjectConstant() != NULL) {
-    return v->type()->as_ObjectConstant()->value()->is_null_object();
-  } else if (v->type()->as_FloatConstant() != NULL) {
-    return v->type()->as_FloatConstant()->value() == 0.0f;
-  } else if (v->type()->as_DoubleConstant() != NULL) {
-    return v->type()->as_DoubleConstant()->value() == 0.0;
-  } else {
-    return false;
-  }
-
-  return Assembler::is_arith_imm_in_range(constant) || Assembler::is_arith_imm_in_range(-constant);
-}
-
-
-static bool can_inline_as_constant_in_logic(Value v) {
-  if (v->type()->as_IntConstant() != NULL) {
-    return Assembler::LogicalImmediate(v->type()->as_IntConstant()->value(), true).is_encoded();
-  } else if (v->type()->as_LongConstant() != NULL) {
-    return Assembler::LogicalImmediate(v->type()->as_LongConstant()->value(), false).is_encoded();
-  }
-  return false;
-}
-
-
-#endif // AARCH64
 
 
 LIR_Opr LIRGenerator::safepoint_poll_register() {
@@ -211,48 +160,10 @@ static LIR_Opr make_constant(BasicType type, jlong c) {
   }
 }
 
-#ifdef AARCH64
-
-void LIRGenerator::add_constant(LIR_Opr src, jlong c, LIR_Opr dest) {
-  if (c == 0) {
-    __ move(src, dest);
-    return;
-  }
-
-  BasicType type = src->type();
-  bool is_neg = (c < 0);
-  c = ABS(c);
-
-  if ((c >> 24) == 0) {
-    for (int shift = 0; shift <= 12; shift += 12) {
-      int part = ((int)c) & (right_n_bits(12) << shift);
-      if (part != 0) {
-        if (is_neg) {
-          __ sub(src, make_constant(type, part), dest);
-        } else {
-          __ add(src, make_constant(type, part), dest);
-        }
-        src = dest;
-      }
-    }
-  } else {
-    __ move(make_constant(type, c), dest);
-    if (is_neg) {
-      __ sub(src, dest, dest);
-    } else {
-      __ add(src, dest, dest);
-    }
-  }
-}
-
-#endif // AARCH64
 
 
 void LIRGenerator::add_large_constant(LIR_Opr src, int c, LIR_Opr dest) {
   assert(c != 0, "must be");
-#ifdef AARCH64
-  add_constant(src, c, dest);
-#else
   // Find first non-zero bit
   int shift = 0;
   while ((c & (3 << shift)) == 0) {
@@ -272,7 +183,6 @@ void LIRGenerator::add_large_constant(LIR_Opr src, int c, LIR_Opr dest) {
   if (c & (mask << 24)) {
     __ add(dest, LIR_OprFact::intConst(c & (mask << 24)), dest);
   }
-#endif // AARCH64
 }
 
 static LIR_Address* make_address(LIR_Opr base, LIR_Opr index, LIR_Address::Scale scale, BasicType type) {
@@ -288,7 +198,6 @@ LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
     index = LIR_OprFact::illegalOpr;
   }
 
-#ifndef AARCH64
   if (base->type() == T_LONG) {
     LIR_Opr tmp = new_register(T_INT);
     __ convert(Bytecodes::_l2i, base, tmp);
@@ -302,26 +211,11 @@ LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
   // At this point base and index should be all ints and not constants
   assert(base->is_single_cpu() && !base->is_constant(), "base should be an non-constant int");
   assert(index->is_illegal() || (index->type() == T_INT && !index->is_constant()), "index should be an non-constant int");
-#endif
 
   int max_disp;
   bool disp_is_in_range;
   bool embedded_shift;
 
-#ifdef AARCH64
-  int align = exact_log2(type2aelembytes(type, true));
-  assert((disp & right_n_bits(align)) == 0, "displacement is not aligned");
-  assert(shift == 0 || shift == align, "shift should be zero or equal to embedded align");
-  max_disp = (1 << 12) << align;
-
-  if (disp >= 0) {
-    disp_is_in_range = Assembler::is_unsigned_imm_in_range(disp, 12, align);
-  } else {
-    disp_is_in_range = Assembler::is_imm_in_range(disp, 9, 0);
-  }
-
-  embedded_shift = true;
-#else
   switch (type) {
     case T_BYTE:
     case T_SHORT:
@@ -344,7 +238,6 @@ LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
   }
 
   disp_is_in_range = (-max_disp < disp && disp < max_disp);
-#endif // !AARCH64
 
   if (index->is_register()) {
     LIR_Opr tmp = new_pointer_register();
@@ -394,11 +287,7 @@ LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_o
 LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
   assert(type == T_LONG || type == T_INT, "should be");
   LIR_Opr r = make_constant(type, x);
-#ifdef AARCH64
-  bool imm_in_range = Assembler::LogicalImmediate(x, type == T_INT).is_encoded();
-#else
   bool imm_in_range = AsmOperand::is_rotated_imm(x);
-#endif // AARCH64
   if (!imm_in_range) {
     LIR_Opr tmp = new_register(type);
     __ move(r, tmp);
@@ -439,14 +328,9 @@ void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr bas
 bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
   assert(left != result, "should be different registers");
   if (is_power_of_2(c + 1)) {
-#ifdef AARCH64
-    __ shift_left(left, log2_intptr(c + 1), result);
-    __ sub(result, left, result);
-#else
     LIR_Address::Scale scale = (LIR_Address::Scale) log2_intptr(c + 1);
     LIR_Address* addr = new LIR_Address(left, left, scale, 0, T_INT);
     __ sub(LIR_OprFact::address(addr), left, result); // rsb with shifted register
-#endif // AARCH64
     return true;
   } else if (is_power_of_2(c - 1)) {
     LIR_Address::Scale scale = (LIR_Address::Scale) log2_intptr(c - 1);
@@ -465,12 +349,7 @@ void LIRGenerator::store_stack_parameter(LIR_Opr item, ByteSize offset_from_sp)
 
 void LIRGenerator::set_card(LIR_Opr value, LIR_Address* card_addr) {
   assert(CardTable::dirty_card_val() == 0,
-    "Cannot use ZR register (aarch64) or the register containing the card table base address directly (aarch32) otherwise");
-#ifdef AARCH64
-  // AARCH64 has a register that is constant zero. We can use that one to set the
-  // value in the card table to dirty.
-  __ move(FrameMap::ZR_opr, card_addr);
-#else // AARCH64
+    "Cannot use the register containing the card table base address directly");
   if((ci_card_table_address_as<intx>() & 0xff) == 0) {
     // If the card table base address is aligned to 256 bytes, we can use the register
     // that contains the card_table_base_address.
@@ -481,7 +360,6 @@ void LIRGenerator::set_card(LIR_Opr value, LIR_Address* card_addr) {
     __ move(LIR_OprFact::intConst(CardTable::dirty_card_val()), tmp_zero);
     __ move(tmp_zero, card_addr);
   }
-#endif // AARCH64
 }
 
 void LIRGenerator::CardTableBarrierSet_post_barrier_helper(LIR_OprDesc* addr, LIR_Const* card_table_base) {
@@ -492,24 +370,16 @@ void LIRGenerator::CardTableBarrierSet_post_barrier_helper(LIR_OprDesc* addr, LI
 
   LIR_Opr tmp = FrameMap::LR_ptr_opr;
 
-  // TODO-AARCH64: check performance
-  bool load_card_table_base_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw());
+  bool load_card_table_base_const = VM_Version::supports_movw();
   if (load_card_table_base_const) {
     __ move((LIR_Opr)card_table_base, tmp);
   } else {
     __ move(new LIR_Address(FrameMap::Rthread_opr, in_bytes(JavaThread::card_table_base_offset()), T_ADDRESS), tmp);
   }
 
-#ifdef AARCH64
-  LIR_Address* shifted_reg_operand = new LIR_Address(tmp, addr, (LIR_Address::Scale) -CardTable::card_shift, 0, T_BYTE);
-  LIR_Opr tmp2 = tmp;
-  __ add(tmp, LIR_OprFact::address(shifted_reg_operand), tmp2); // tmp2 = tmp + (addr >> CardTable::card_shift)
-  LIR_Address* card_addr = new LIR_Address(tmp2, T_BYTE);
-#else
   // Use unsigned type T_BOOLEAN here rather than (signed) T_BYTE since signed load
   // byte instruction does not support the addressing mode we need.
   LIR_Address* card_addr = new LIR_Address(tmp, addr, (LIR_Address::Scale) -CardTable::card_shift, 0, T_BOOLEAN);
-#endif
   if (UseCondCardMark) {
     if (ct->scanned_concurrently()) {
       __ membar_storeload();
@@ -679,63 +549,6 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
     info = state_for(x);
   }
 
-#ifdef AARCH64
-  LIRItem left(x->x(), this);
-  LIRItem right(x->y(), this);
-  LIRItem* left_arg = &left;
-  LIRItem* right_arg = &right;
-
-  // Test if instr is commutative and if we should swap
-  if (x->is_commutative() && left.is_constant()) {
-    left_arg = &right;
-    right_arg = &left;
-  }
-
-  left_arg->load_item();
-  switch (x->op()) {
-    case Bytecodes::_ldiv:
-      right_arg->load_item();
-      make_div_by_zero_check(right_arg->result(), T_LONG, info);
-      __ idiv(left_arg->result(), right_arg->result(), rlock_result(x), LIR_OprFact::illegalOpr, NULL);
-      break;
-
-    case Bytecodes::_lrem: {
-      right_arg->load_item();
-      make_div_by_zero_check(right_arg->result(), T_LONG, info);
-      // a % b is implemented with 2 instructions:
-      // tmp = a/b       (sdiv)
-      // res = a - b*tmp (msub)
-      LIR_Opr tmp = FrameMap::as_long_opr(Rtemp);
-      __ irem(left_arg->result(), right_arg->result(), rlock_result(x), tmp, NULL);
-      break;
-    }
-
-    case Bytecodes::_lmul:
-      if (right_arg->is_constant() && is_power_of_2_long(right_arg->get_jlong_constant())) {
-        right_arg->dont_load_item();
-        __ shift_left(left_arg->result(), exact_log2_long(right_arg->get_jlong_constant()), rlock_result(x));
-      } else {
-        right_arg->load_item();
-        __ mul(left_arg->result(), right_arg->result(), rlock_result(x));
-      }
-      break;
-
-    case Bytecodes::_ladd:
-    case Bytecodes::_lsub:
-      if (right_arg->is_constant()) {
-        jlong c = right_arg->get_jlong_constant();
-        add_constant(left_arg->result(), (x->op() == Bytecodes::_ladd) ? c : -c, rlock_result(x));
-      } else {
-        right_arg->load_item();
-        arithmetic_op_long(x->op(), rlock_result(x), left_arg->result(), right_arg->result(), NULL);
-      }
-      break;
-
-    default:
-      ShouldNotReachHere();
-      return;
-  }
-#else
   switch (x->op()) {
     case Bytecodes::_ldiv:
     case Bytecodes::_lrem: {
@@ -777,7 +590,6 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
     default:
       ShouldNotReachHere();
   }
-#endif // AARCH64
 }
 
 
@@ -804,20 +616,6 @@ void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
       LIR_Opr result = rlock_result(x);
       __ idiv(left_arg->result(), right_arg->result(), result, tmp, info);
     } else {
-#ifdef AARCH64
-      left_arg->load_item();
-      right_arg->load_item();
-      make_div_by_zero_check(right_arg->result(), T_INT, info);
-      if (x->op() == Bytecodes::_idiv) {
-        __ idiv(left_arg->result(), right_arg->result(), rlock_result(x), LIR_OprFact::illegalOpr, NULL);
-      } else {
-        // a % b is implemented with 2 instructions:
-        // tmp = a/b       (sdiv)
-        // res = a - b*tmp (msub)
-        LIR_Opr tmp = FrameMap::as_opr(Rtemp);
-        __ irem(left_arg->result(), right_arg->result(), rlock_result(x), tmp, NULL);
-      }
-#else
       left_arg->load_item_force(FrameMap::R0_opr);
       right_arg->load_item_force(FrameMap::R2_opr);
       LIR_Opr tmp = FrameMap::R1_opr;
@@ -831,16 +629,8 @@ void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
         __ idiv(left_arg->result(), right_arg->result(), out_reg, tmp, info);
       }
       __ move(out_reg, result);
-#endif // AARCH64
     }
 
-#ifdef AARCH64
-  } else if (((x->op() == Bytecodes::_iadd) || (x->op() == Bytecodes::_isub)) && right_arg->is_constant()) {
-    left_arg->load_item();
-    jint c = right_arg->get_jint_constant();
-    right_arg->dont_load_item();
-    add_constant(left_arg->result(), (x->op() == Bytecodes::_iadd) ? c : -c, rlock_result(x));
-#endif // AARCH64
 
   } else {
     left_arg->load_item();
@@ -852,7 +642,6 @@ void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
         right_arg->load_item();
       }
     } else {
-      AARCH64_ONLY(assert(!right_arg->is_constant(), "constant right_arg is already handled by this moment");)
       right_arg->load_nonconstant();
     }
     rlock_result(x);
@@ -880,11 +669,9 @@ void LIRGenerator::do_ShiftOp(ShiftOp* x) {
   LIRItem value(x->x(), this);
   LIRItem count(x->y(), this);
 
-#ifndef AARCH64
   if (value.type()->is_long()) {
     count.set_destroys_register();
   }
-#endif // !AARCH64
 
   if (count.is_constant()) {
     assert(count.type()->as_IntConstant() != NULL, "should be");
@@ -906,15 +693,7 @@ void LIRGenerator::do_LogicOp(LogicOp* x) {
 
   left.load_item();
 
-#ifdef AARCH64
-  if (right.is_constant() && can_inline_as_constant_in_logic(right.value())) {
-    right.dont_load_item();
-  } else {
-    right.load_item();
-  }
-#else
   right.load_nonconstant();
-#endif // AARCH64
 
   logic_op(x->op(), rlock_result(x), left.result(), right.result());
 }
@@ -956,15 +735,7 @@ void LIRGenerator::do_CompareOp(CompareOp* x) {
   LIRItem right(x->y(), this);
   left.load_item();
 
-#ifdef AARCH64
-  if (right.is_constant() && can_inline_as_constant_in_cmp(right.value())) {
-    right.dont_load_item();
-  } else {
-    right.load_item();
-  }
-#else
   right.load_nonconstant();
-#endif // AARCH64
 
   LIR_Opr reg = rlock_result(x);
 
@@ -987,19 +758,11 @@ LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, LIRItem& cmp_
   cmp_value.load_item();
   LIR_Opr result = new_register(T_INT);
   if (type == T_OBJECT || type == T_ARRAY) {
-#ifdef AARCH64
-    if (UseCompressedOops) {
-      tmp1 = new_pointer_register();
-      tmp2 = new_pointer_register();
-    }
-#endif
     __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result);
   } else if (type == T_INT) {
     __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), tmp1, tmp1, result);
   } else if (type == T_LONG) {
-#ifndef AARCH64
     tmp1 = new_register(T_LONG);
-#endif // !AARCH64
     __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), tmp1, tmp2, result);
   } else {
     ShouldNotReachHere();
@@ -1135,7 +898,6 @@ void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
 void LIRGenerator::do_Convert(Convert* x) {
   address runtime_func;
   switch (x->op()) {
-#ifndef AARCH64
     case Bytecodes::_l2f:
       runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::l2f);
       break;
@@ -1170,7 +932,6 @@ void LIRGenerator::do_Convert(Convert* x) {
       runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::d2i);
       break;
 #endif // __SOFTFP__
-#endif // !AARCH64
     default: {
       LIRItem value(x->value(), this);
       value.load_item();
@@ -1488,7 +1249,6 @@ void LIRGenerator::do_If(If* x) {
   LIRItem* yin = &yitem;
   If::Condition cond = x->cond();
 
-#ifndef AARCH64
   if (tag == longTag) {
     if (cond == If::gtr || cond == If::leq) {
       cond = Instruction::mirror(cond);
@@ -1497,20 +1257,11 @@ void LIRGenerator::do_If(If* x) {
     }
     xin->set_destroys_register();
   }
-#endif // !AARCH64
 
   xin->load_item();
   LIR_Opr left = xin->result();
   LIR_Opr right;
 
-#ifdef AARCH64
-  if (yin->is_constant() && can_inline_as_constant_in_cmp(yin->value())) {
-    yin->dont_load_item();
-  } else {
-    yin->load_item();
-  }
-  right = yin->result();
-#else
   if (tag == longTag && yin->is_constant() && yin->get_jlong_constant() == 0 &&
       (cond == If::eql || cond == If::neq)) {
     // inline long zero
@@ -1519,7 +1270,6 @@ void LIRGenerator::do_If(If* x) {
     yin->load_nonconstant();
     right = yin->result();
   }
-#endif // AARCH64
 
   set_no_result(x);
 
@@ -1558,7 +1308,6 @@ void LIRGenerator::trace_block_entry(BlockBegin* block) {
 
 void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
                                         CodeEmitInfo* info) {
-#ifndef AARCH64
   if (value->is_double_cpu()) {
     assert(address->index()->is_illegal(), "should have a constant displacement");
     LIR_Opr tmp = new_pointer_register();
@@ -1566,14 +1315,11 @@ void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
     __ volatile_store_mem_reg(value, new LIR_Address(tmp, (intx)0, address->type()), info);
     return;
   }
-#endif // !AARCH64
-  // TODO-AARCH64 implement with stlr instruction
   __ store(value, address, info, lir_patch_none);
 }
 
 void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
                                        CodeEmitInfo* info) {
-#ifndef AARCH64
   if (result->is_double_cpu()) {
     assert(address->index()->is_illegal(), "should have a constant displacement");
     LIR_Opr tmp = new_pointer_register();
@@ -1581,7 +1327,5 @@ void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
     __ volatile_load_mem_reg(new LIR_Address(tmp, (intx)0, address->type()), result, info);
     return;
   }
-#endif // !AARCH64
-  // TODO-AARCH64 implement with ldar instruction
   __ load(address, result, info, lir_patch_none);
 }
diff --git a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.hpp b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.hpp
index 24552cb1528..5bb8e473e7e 100644
--- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.hpp
+++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.hpp
@@ -27,7 +27,3 @@
 
   void make_div_by_zero_check(LIR_Opr right_arg, BasicType type, CodeEmitInfo* info);
 
-#ifdef AARCH64
-  // the helper for arithmetic
-  void add_constant(LIR_Opr src, jlong c, LIR_Opr dest);
-#endif // AARCH64
diff --git a/src/hotspot/cpu/arm/c1_LIR_arm.cpp b/src/hotspot/cpu/arm/c1_LIR_arm.cpp
index 806da320209..791c6985137 100644
--- a/src/hotspot/cpu/arm/c1_LIR_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_LIR_arm.cpp
@@ -33,17 +33,6 @@ FloatRegister LIR_OprDesc::as_double_reg() const {
   return as_FloatRegister(fpu_regnrLo());
 }
 
-#ifdef AARCH64
-// Reg2 unused.
-LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
-  assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
-  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
-                             (reg1 << LIR_OprDesc::reg2_shift) |
-                             LIR_OprDesc::double_type          |
-                             LIR_OprDesc::fpu_register         |
-                             LIR_OprDesc::double_size);
-}
-#else
 LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
   assert(as_FloatRegister(reg2) != fnoreg, "Arm32 holds double in two regs.");
   return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
@@ -52,22 +41,12 @@ LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
                              LIR_OprDesc::fpu_register         |
                              LIR_OprDesc::double_size);
 }
-#endif
 
 #ifndef PRODUCT
 void LIR_Address::verify() const {
 #ifdef _LP64
   assert(base()->is_cpu_register(), "wrong base operand");
 #endif
-#ifdef AARCH64
-  if (base()->type() == T_INT) {
-    assert(index()->is_single_cpu() && (index()->type() == T_INT), "wrong index operand");
-  } else {
-    assert(index()->is_illegal() || index()->is_double_cpu() ||
-           (index()->is_single_cpu() && (index()->is_oop_register() || index()->type() == T_INT)), "wrong index operand");
-    assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, "wrong type for addresses");
-  }
-#else
   assert(disp() == 0 || index()->is_illegal(), "can't have both");
   // Note: offsets higher than 4096 must not be rejected here. They can
   // be handled by the back-end or will be rejected if not.
@@ -81,6 +60,5 @@ void LIR_Address::verify() const {
   assert(base()->type() == T_OBJECT || base()->type() == T_INT || base()->type() == T_METADATA,
          "wrong type for addresses");
 #endif
-#endif // AARCH64
 }
 #endif // PRODUCT
diff --git a/src/hotspot/cpu/arm/c1_LinearScan_arm.hpp b/src/hotspot/cpu/arm/c1_LinearScan_arm.hpp
index d67643c7dbf..5a5ad691b5b 100644
--- a/src/hotspot/cpu/arm/c1_LinearScan_arm.hpp
+++ b/src/hotspot/cpu/arm/c1_LinearScan_arm.hpp
@@ -31,24 +31,17 @@ inline bool LinearScan::is_processed_reg_num(int reg_num) {
 }
 
 inline int LinearScan::num_physical_regs(BasicType type) {
-#ifndef AARCH64
   if (type == T_LONG || type == T_DOUBLE) return 2;
-#endif // !AARCH64
   return 1;
 }
 
 
 inline bool LinearScan::requires_adjacent_regs(BasicType type) {
-#ifdef AARCH64
-  return false;
-#else
   return type == T_DOUBLE || type == T_LONG;
-#endif // AARCH64
 }
 
 inline bool LinearScan::is_caller_save(int assigned_reg) {
   assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
-  // TODO-AARCH64 try to add callee-saved registers
   return true;
 }
 
diff --git a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp
index facfbdd4de0..b2f8ddc4945 100644
--- a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp
@@ -46,11 +46,7 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
   load_klass(Rtemp, receiver);
   cmp(Rtemp, iCache);
   b(verified, eq); // jump over alignment no-ops
-#ifdef AARCH64
-  jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
-#else
   jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
-#endif
   align(CodeEntryAlignment);
   bind(verified);
 }
@@ -59,10 +55,6 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by
   assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
   assert((frame_size_in_bytes % StackAlignmentInBytes) == 0, "frame size should be aligned");
 
-#ifdef AARCH64
-  // Extra nop for MT-safe patching in NativeJump::patch_verified_entry
-  nop();
-#endif // AARCH64
 
   arm_stack_overflow_check(bang_size_in_bytes, Rtemp);
 
@@ -104,28 +96,12 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
     mov(tmp, (intptr_t)markOopDesc::prototype());
   }
 
-#ifdef AARCH64
-  if (UseCompressedClassPointers) {
-    str(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
-    encode_klass_not_null(tmp, klass);          // Take care not to kill klass
-    str_w(tmp, Address(obj, oopDesc::klass_offset_in_bytes()));
-  } else {
-    assert(oopDesc::mark_offset_in_bytes() + wordSize == oopDesc::klass_offset_in_bytes(), "adjust this code");
-    stp(tmp, klass, Address(obj, oopDesc::mark_offset_in_bytes()));
-  }
-#else
   str(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
   str(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
-#endif // AARCH64
 
   if (len->is_valid()) {
     str_32(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
   }
-#ifdef AARCH64
-  else if (UseCompressedClassPointers) {
-    store_klass_gap(obj);
-  }
-#endif // AARCH64
 }
 
 
@@ -146,40 +122,6 @@ void C1_MacroAssembler::initialize_object(Register obj, Register obj_end, Regist
   const Register ptr = tmp2;
 
   if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
-#ifdef AARCH64
-    if (obj_size_in_bytes < 0) {
-      add_rc(ptr, obj, header_size);
-      initialize_body(ptr, obj_end, tmp1);
-
-    } else {
-      int base = instanceOopDesc::header_size() * HeapWordSize;
-      assert(obj_size_in_bytes >= base, "should be");
-
-      const int zero_bytes = obj_size_in_bytes - base;
-      assert((zero_bytes % wordSize) == 0, "should be");
-
-      if ((zero_bytes % (2*wordSize)) != 0) {
-        str(ZR, Address(obj, base));
-        base += wordSize;
-      }
-
-      const int stp_count = zero_bytes / (2*wordSize);
-
-      if (zero_bytes > 8 * wordSize) {
-        Label loop;
-        add(ptr, obj, base);
-        mov(tmp1, stp_count);
-        bind(loop);
-        subs(tmp1, tmp1, 1);
-        stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
-        b(loop, gt);
-      } else {
-        for (int i = 0; i < stp_count; i++) {
-          stp(ZR, ZR, Address(obj, base + i * 2 * wordSize));
-        }
-      }
-    }
-#else
     if (obj_size_in_bytes >= 0 && obj_size_in_bytes <= 8 * BytesPerWord) {
       mov(tmp1, 0);
       const int base = instanceOopDesc::header_size() * HeapWordSize;
@@ -191,7 +133,6 @@ void C1_MacroAssembler::initialize_object(Register obj, Register obj_end, Regist
       add(ptr, obj, header_size);
       initialize_body(ptr, obj_end, tmp1);
     }
-#endif // AARCH64
   }
 
   // StoreStore barrier required after complete initialization
@@ -228,12 +169,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len,
   const int scale_shift = exact_log2(element_size);
   const Register obj_size = Rtemp; // Rtemp should be free at c1 LIR level
 
-#ifdef AARCH64
-  mov_slow(Rtemp, max_array_allocation_length);
-  cmp_32(len, Rtemp);
-#else
   cmp_32(len, max_array_allocation_length);
-#endif // AARCH64
   b(slow_case, hs);
 
   bool align_header = ((header_size_in_bytes | element_size) & MinObjAlignmentInBytesMask) != 0;
@@ -272,34 +208,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj,
 
   assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions");
 
-#ifdef AARCH64
-
-  str(obj, Address(disp_hdr, obj_offset));
-
-  if (!UseBiasedLocking) {
-    null_check_offset = offset();
-  }
-  ldr(hdr, obj);
-
-  // Test if object is already locked
-  assert(markOopDesc::unlocked_value == 1, "adjust this code");
-  tbnz(hdr, exact_log2(markOopDesc::unlocked_value), fast_lock);
-
-  // Check for recursive locking
-  // See comments in InterpreterMacroAssembler::lock_object for
-  // explanations on the fast recursive locking check.
-  intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
-  Assembler::LogicalImmediate imm(mask, false);
-  mov(tmp2, SP);
-  sub(tmp2, hdr, tmp2);
-  ands(tmp2, tmp2, imm);
-  b(slow_case, ne);
-
-  // Recursive locking: store 0 into a lock record
-  str(ZR, Address(disp_hdr, mark_offset));
-  b(fast_lock_done);
-
-#else // AARCH64
 
   if (!UseBiasedLocking) {
     null_check_offset = offset();
@@ -330,7 +238,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj,
   // else need slow case
   b(slow_case);
 
-#endif // AARCH64
 
   bind(fast_lock);
   // Save previous object header in BasicLock structure and update the header
diff --git a/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp b/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp
index b44ab31c6d8..b26c4524e2b 100644
--- a/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp
@@ -80,15 +80,8 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
   // Runtime1::exception_handler_for_pc
   if (_stub_id != Runtime1::forward_exception_id) {
     assert(frame_size() != no_frame_size, "cannot directly call forward_exception_id");
-#ifdef AARCH64
-    Label skip;
-    cbz(R3, skip);
-    jump(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type, Rtemp);
-    bind(skip);
-#else
     cmp(R3, 0);
     jump(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type, Rtemp, ne);
-#endif // AARCH64
   } else {
 #ifdef ASSERT
     // Should not have pending exception in forward_exception stub
@@ -124,43 +117,6 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
 #define __ sasm->
 
 // TODO: ARM - does this duplicate RegisterSaver in SharedRuntime?
-#ifdef AARCH64
-
-  //
-  // On AArch64 registers save area has the following layout:
-  //
-  // |---------------------|
-  // | return address (LR) |
-  // | FP                  |
-  // |---------------------|
-  // | D31                 |
-  // | ...                 |
-  // | D0                  |
-  // |---------------------|
-  // | padding             |
-  // |---------------------|
-  // | R28                 |
-  // | ...                 |
-  // | R0                  |
-  // |---------------------| <-- SP
-  //
-
-enum RegisterLayout {
-  number_of_saved_gprs = 29,
-  number_of_saved_fprs = FloatRegisterImpl::number_of_registers,
-
-  R0_offset  = 0,
-  D0_offset  = R0_offset + number_of_saved_gprs + 1,
-  FP_offset  = D0_offset + number_of_saved_fprs,
-  LR_offset  = FP_offset + 1,
-
-  reg_save_size = LR_offset + 1,
-
-  arg1_offset = reg_save_size * wordSize,
-  arg2_offset = (reg_save_size + 1) * wordSize
-};
-
-#else
 
 enum RegisterLayout {
   fpu_save_size = pd_nof_fpu_regs_reg_alloc,
@@ -191,7 +147,6 @@ enum RegisterLayout {
   arg2_offset = (reg_save_size + 1) * wordSize
 };
 
-#endif // AARCH64
 
 static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers = HaveVFP) {
   sasm->set_frame_size(reg_save_size /* in words */);
@@ -200,19 +155,6 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers = H
   // Locations are offsets from sp after runtime call.
   OopMap* map = new OopMap(VMRegImpl::slots_per_word * reg_save_size, 0);
 
-#ifdef AARCH64
-  for (int i = 0; i < number_of_saved_gprs; i++) {
-    map->set_callee_saved(VMRegImpl::stack2reg((R0_offset + i) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg());
-  }
-  map->set_callee_saved(VMRegImpl::stack2reg(FP_offset * VMRegImpl::slots_per_word), FP->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(LR_offset * VMRegImpl::slots_per_word), LR->as_VMReg());
-
-  if (save_fpu_registers) {
-    for (int i = 0; i < number_of_saved_fprs; i++) {
-      map->set_callee_saved(VMRegImpl::stack2reg((D0_offset + i) * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg());
-    }
-  }
-#else
   int j=0;
   for (int i = R0_offset; i < R10_offset; i++) {
     if (j == FP_REG_NUM) {
@@ -235,7 +177,6 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers = H
       map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
     }
   }
-#endif // AARCH64
 
   return map;
 }
@@ -244,29 +185,6 @@ static OopMap* save_live_registers(StubAssembler* sasm, bool save_fpu_registers
   __ block_comment("save_live_registers");
   sasm->set_frame_size(reg_save_size /* in words */);
 
-#ifdef AARCH64
-  assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned");
-
-  __ raw_push(FP, LR);
-
-  __ sub(SP, SP, (reg_save_size - 2) * wordSize);
-
-  for (int i = 0; i < align_down((int)number_of_saved_gprs, 2); i += 2) {
-    __ stp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
-  }
-
-  if (is_odd(number_of_saved_gprs)) {
-    int i = number_of_saved_gprs - 1;
-    __ str(as_Register(i), Address(SP, (R0_offset + i) * wordSize));
-  }
-
-  if (save_fpu_registers) {
-    assert (is_even(number_of_saved_fprs), "adjust this code");
-    for (int i = 0; i < number_of_saved_fprs; i += 2) {
-      __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), Address(SP, (D0_offset + i) * wordSize));
-    }
-  }
-#else
   __ push(RegisterSet(FP) | RegisterSet(LR));
   __ push(RegisterSet(R0, R6) | RegisterSet(R8, R10) | R12 | altFP_7_11);
   if (save_fpu_registers) {
@@ -274,7 +192,6 @@ static OopMap* save_live_registers(StubAssembler* sasm, bool save_fpu_registers
   } else {
     __ sub(SP, SP, fpu_save_size * wordSize);
   }
-#endif // AARCH64
 
   return generate_oop_map(sasm, save_fpu_registers);
 }
@@ -287,34 +204,6 @@ static void restore_live_registers(StubAssembler* sasm,
                                    bool restore_fpu_registers = HaveVFP) {
   __ block_comment("restore_live_registers");
 
-#ifdef AARCH64
-  if (restore_R0) {
-    __ ldr(R0, Address(SP, R0_offset * wordSize));
-  }
-
-  assert(is_odd(number_of_saved_gprs), "adjust this code");
-  for (int i = 1; i < number_of_saved_gprs; i += 2) {
-    __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
-  }
-
-  if (restore_fpu_registers) {
-    assert (is_even(number_of_saved_fprs), "adjust this code");
-    for (int i = 0; i < number_of_saved_fprs; i += 2) {
-      __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), Address(SP, (D0_offset + i) * wordSize));
-    }
-  }
-
-  __ add(SP, SP, (reg_save_size - 2) * wordSize);
-
-  if (restore_FP_LR) {
-    __ raw_pop(FP, LR);
-    if (do_return) {
-      __ ret();
-    }
-  } else {
-    assert (!do_return, "return without restoring FP/LR");
-  }
-#else
   if (restore_fpu_registers) {
     __ fpop(FloatRegisterSet(D0, fpu_save_size / 2));
     if (!restore_R0) {
@@ -329,7 +218,6 @@ static void restore_live_registers(StubAssembler* sasm,
   } else {
     assert (!do_return, "return without restoring FP/LR");
   }
-#endif // AARCH64
 }
 
 
@@ -341,11 +229,9 @@ static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registe
   restore_live_registers(sasm, true, true, true, restore_fpu_registers);
 }
 
-#ifndef AARCH64
 static void restore_live_registers_except_FP_LR(StubAssembler* sasm, bool restore_fpu_registers = HaveVFP) {
   restore_live_registers(sasm, true, false, false, restore_fpu_registers);
 }
-#endif // !AARCH64
 
 static void restore_live_registers_without_return(StubAssembler* sasm, bool restore_fpu_registers = HaveVFP) {
   restore_live_registers(sasm, true, true, false, restore_fpu_registers);
@@ -386,15 +272,8 @@ OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address targe
 static void restore_sp_for_method_handle(StubAssembler* sasm) {
   // Restore SP from its saved reg (FP) if the exception PC is a MethodHandle call site.
   __ ldr_s32(Rtemp, Address(Rthread, JavaThread::is_method_handle_return_offset()));
-#ifdef AARCH64
-  Label skip;
-  __ cbz(Rtemp, skip);
-  __ mov(SP, Rmh_SP_save);
-  __ bind(skip);
-#else
   __ cmp(Rtemp, 0);
   __ mov(SP, Rmh_SP_save, ne);
-#endif // AARCH64
 }
 
 
@@ -500,22 +379,12 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
 
   __ cmp_32(R0, 0);
 
-#ifdef AARCH64
-  Label call_deopt;
-
-  restore_live_registers_without_return(sasm);
-  __ b(call_deopt, ne);
-  __ ret();
-
-  __ bind(call_deopt);
-#else
   restore_live_registers_except_FP_LR(sasm);
   __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
 
   // Deoptimization needed
   // TODO: ARM - no need to restore FP & LR because unpack_with_reexecution() stores them back
   __ pop(RegisterSet(FP) | RegisterSet(LR));
-#endif // AARCH64
 
   __ jump(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type, Rtemp);
 
@@ -623,12 +492,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
         if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
           Label slow_case, slow_case_no_pop;
 
-#ifdef AARCH64
-          __ mov_slow(Rtemp, C1_MacroAssembler::max_array_allocation_length);
-          __ cmp_32(length, Rtemp);
-#else
           __ cmp_32(length, C1_MacroAssembler::max_array_allocation_length);
-#endif // AARCH64
           __ b(slow_case_no_pop, hs);
 
           // Free some temporary registers
@@ -645,12 +509,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
           __ mov(arr_size, MinObjAlignmentInBytesMask);
           __ and_32(tmp2, tmp1, (unsigned int)(Klass::_lh_header_size_mask << Klass::_lh_header_size_shift));
 
-#ifdef AARCH64
-          __ lslv_w(tmp3, length, tmp1);
-          __ add(arr_size, arr_size, tmp3);
-#else
           __ add(arr_size, arr_size, AsmOperand(length, lsl, tmp1));
-#endif // AARCH64
 
           __ add(arr_size, arr_size, AsmOperand(tmp2, lsr, Klass::_lh_header_size_shift));
           __ align_reg(arr_size, arr_size, MinObjAlignmentInBytes);
@@ -716,15 +575,8 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
         __ load_klass(Rtemp, R0);
         __ ldr_u32(Rtemp, Address(Rtemp, Klass::access_flags_offset()));
 
-#ifdef AARCH64
-        Label L;
-        __ tbnz(Rtemp, exact_log2(JVM_ACC_HAS_FINALIZER), L);
-        __ ret();
-        __ bind(L);
-#else
         __ tst(Rtemp, JVM_ACC_HAS_FINALIZER);
         __ bx(LR, eq);
-#endif // AARCH64
 
         // Call VM
         OopMap* map = save_live_registers(sasm);
@@ -746,9 +598,6 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
     case throw_index_exception_id:
       {
         __ set_info("index_range_check_failed", dont_gc_arguments);
-#ifdef AARCH64
-        __ NOT_TESTED();
-#endif
         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
       }
       break;
@@ -806,9 +655,6 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
     case throw_incompatible_class_change_error_id:
       {
         __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments);
-#ifdef AARCH64
-        __ NOT_TESTED();
-#endif
         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
       }
       break;
@@ -892,7 +738,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
         restore_live_registers_without_return(sasm);
         DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
         assert(deopt_blob != NULL, "deoptimization blob must have been created");
-        __ jump(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type, AARCH64_ONLY(Rtemp) NOT_AARCH64(noreg));
+        __ jump(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type, noreg);
       }
       break;
 
diff --git a/src/hotspot/cpu/arm/c2_globals_arm.hpp b/src/hotspot/cpu/arm/c2_globals_arm.hpp
index 9e19c53f8b6..bb9c6d480c8 100644
--- a/src/hotspot/cpu/arm/c2_globals_arm.hpp
+++ b/src/hotspot/cpu/arm/c2_globals_arm.hpp
@@ -39,27 +39,15 @@ define_pd_global(bool, PreferInterpreterNativeStubs, false);
 define_pd_global(bool, ProfileTraps,                 true);
 define_pd_global(bool, UseOnStackReplacement,        true);
 define_pd_global(bool, ProfileInterpreter,           true);
-#ifdef AARCH64
-define_pd_global(bool, TieredCompilation,            trueInTiered);
-#else
 define_pd_global(bool, TieredCompilation,            false);
-#endif
 define_pd_global(intx, CompileThreshold,             10000);
 
 define_pd_global(intx, OnStackReplacePercentage,     140);
 define_pd_global(intx, ConditionalMoveLimit,         4);
 // C2 gets to use all the float/double registers
-#ifdef AARCH64
-define_pd_global(intx, FLOATPRESSURE,                31);
-#else
 define_pd_global(intx, FLOATPRESSURE,                30);
-#endif
 define_pd_global(intx, FreqInlineSize,               175);
-#ifdef AARCH64
-define_pd_global(intx, INTPRESSURE,                  27);
-#else
 define_pd_global(intx, INTPRESSURE,                  12);
-#endif
 define_pd_global(intx, InteriorEntryAlignment,       16);  // = CodeEntryAlignment
 define_pd_global(size_t, NewSizeThreadIncrease,      ScaleForWordSize(4*K));
 // The default setting 16/16 seems to work best.
diff --git a/src/hotspot/cpu/arm/frame_arm.cpp b/src/hotspot/cpu/arm/frame_arm.cpp
index e59afff6dd9..ef96559ca9c 100644
--- a/src/hotspot/cpu/arm/frame_arm.cpp
+++ b/src/hotspot/cpu/arm/frame_arm.cpp
@@ -304,26 +304,12 @@ void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
   *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
 }
 
-#ifdef AARCH64
-
-// Used by template based interpreter deoptimization
-void frame::interpreter_frame_set_stack_top(intptr_t* stack_top) {
-  *((intptr_t**)addr_at(interpreter_frame_stack_top_offset)) = stack_top;
-}
-
-// Used by template based interpreter deoptimization
-void frame::interpreter_frame_set_extended_sp(intptr_t* sp) {
-  *((intptr_t**)addr_at(interpreter_frame_extended_sp_offset)) = sp;
-}
-
-#else
 
 // Used by template based interpreter deoptimization
 void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
     *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
 }
 
-#endif // AARCH64
 
 frame frame::sender_for_entry_frame(RegisterMap* map) const {
   assert(map != NULL, "map must be set");
@@ -334,18 +320,12 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const {
   assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
   map->clear();
   assert(map->include_argument_oops(), "should be set by clear");
-#ifdef AARCH64
-  assert (jfa->last_Java_pc() != NULL, "pc should be stored");
-  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
-  return fr;
-#else
   if (jfa->last_Java_pc() != NULL) {
     frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
     return fr;
   }
   frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
   return fr;
-#endif // AARCH64
 }
 
 //------------------------------------------------------------------------------
@@ -403,10 +383,6 @@ void frame::adjust_unextended_sp() {
 void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
   // see x86 for comments
   map->set_location(FP->as_VMReg(), (address) link_addr);
-#ifdef AARCH64
-  // also adjust a high part of register
-  map->set_location(FP->as_VMReg()->next(), (address) link_addr);
-#endif // AARCH64
 }
 
 frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
@@ -539,14 +515,6 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result)
   if (method->is_native()) {
     // Prior to calling into the runtime to report the method_exit both of
     // the possible return value registers are saved.
-#ifdef AARCH64
-    // Return value registers are saved into the frame
-    if (type == T_FLOAT || type == T_DOUBLE) {
-      res_addr = addr_at(interpreter_frame_fp_saved_result_offset);
-    } else {
-      res_addr = addr_at(interpreter_frame_gp_saved_result_offset);
-    }
-#else
     // Return value registers are pushed to the native stack
     res_addr = (intptr_t*)sp();
 #ifdef __ABI_HARD__
@@ -555,7 +523,6 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result)
       res_addr += 2;
     }
 #endif // __ABI_HARD__
-#endif // AARCH64
   } else {
     res_addr = (intptr_t*)interpreter_frame_tos_address();
   }
@@ -602,12 +569,7 @@ intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
 void frame::describe_pd(FrameValues& values, int frame_no) {
   if (is_interpreted_frame()) {
     DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
-#ifdef AARCH64
-    DESCRIBE_FP_OFFSET(interpreter_frame_stack_top);
-    DESCRIBE_FP_OFFSET(interpreter_frame_extended_sp);
-#else
     DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
-#endif // AARCH64
     DESCRIBE_FP_OFFSET(interpreter_frame_method);
     DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
     DESCRIBE_FP_OFFSET(interpreter_frame_cache);
@@ -631,7 +593,6 @@ intptr_t *frame::initial_deoptimization_info() {
 }
 
 intptr_t* frame::real_fp() const {
-#ifndef AARCH64
   if (is_entry_frame()) {
     // Work-around: FP (currently) does not conform to the ABI for entry
     // frames (see generate_call_stub). Might be worth fixing as another CR.
@@ -644,7 +605,6 @@ intptr_t* frame::real_fp() const {
 #endif
     return new_fp;
   }
-#endif // !AARCH64
   if (_cb != NULL) {
     // use the frame size if valid
     int size = _cb->frame_size();
diff --git a/src/hotspot/cpu/arm/frame_arm.hpp b/src/hotspot/cpu/arm/frame_arm.hpp
index 9d0081cf946..eba3fc58c78 100644
--- a/src/hotspot/cpu/arm/frame_arm.hpp
+++ b/src/hotspot/cpu/arm/frame_arm.hpp
@@ -37,22 +37,12 @@
     sender_sp_offset                                 =  2,
 
     // Interpreter frames
-#ifdef AARCH64
-    interpreter_frame_gp_saved_result_offset         =  4, // for native calls only
-    interpreter_frame_fp_saved_result_offset         =  3, // for native calls only
-#endif
     interpreter_frame_oop_temp_offset                =  2, // for native calls only
 
     interpreter_frame_sender_sp_offset               = -1,
-#ifdef AARCH64
-    interpreter_frame_stack_top_offset               = interpreter_frame_sender_sp_offset - 1,
-    interpreter_frame_extended_sp_offset             = interpreter_frame_stack_top_offset - 1,
-    interpreter_frame_method_offset                  = interpreter_frame_extended_sp_offset - 1,
-#else
     // outgoing sp before a call to an invoked method
     interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
     interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
-#endif // AARCH64
     interpreter_frame_mirror_offset                  = interpreter_frame_method_offset - 1,
     interpreter_frame_mdp_offset                     = interpreter_frame_mirror_offset - 1,
     interpreter_frame_cache_offset                   = interpreter_frame_mdp_offset - 1,
@@ -64,7 +54,7 @@
     interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
 
     // Entry frames
-    entry_frame_call_wrapper_offset                  =  AARCH64_ONLY(2) NOT_AARCH64(0)
+    entry_frame_call_wrapper_offset                  =  0
   };
 
   intptr_t ptr_at(int offset) const {
@@ -107,9 +97,7 @@
 
   frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
 
-#ifndef AARCH64
   frame(intptr_t* sp, intptr_t* fp);
-#endif // !AARCH64
 
   void init(intptr_t* sp, intptr_t* fp, address pc);
 
@@ -119,18 +107,11 @@
 
   inline address* sender_pc_addr() const;
 
-#ifdef AARCH64
-  // Used by template based interpreter deoptimization
-  void interpreter_frame_set_stack_top(intptr_t* stack_top);
-  void interpreter_frame_set_extended_sp(intptr_t* sp);
-
-#else
   // expression stack tos if we are nested in a java call
   intptr_t* interpreter_frame_last_sp() const;
 
   // deoptimization support
   void interpreter_frame_set_last_sp(intptr_t* sp);
-#endif // AARCH64
 
   // helper to update a map with callee-saved FP
   static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
diff --git a/src/hotspot/cpu/arm/frame_arm.inline.hpp b/src/hotspot/cpu/arm/frame_arm.inline.hpp
index 132006c09da..41212b6b25a 100644
--- a/src/hotspot/cpu/arm/frame_arm.inline.hpp
+++ b/src/hotspot/cpu/arm/frame_arm.inline.hpp
@@ -83,7 +83,6 @@ inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address
   }
 }
 
-#ifndef AARCH64
 
 inline frame::frame(intptr_t* sp, intptr_t* fp) {
   _sp = sp;
@@ -104,7 +103,6 @@ inline frame::frame(intptr_t* sp, intptr_t* fp) {
   }
 }
 
-#endif // !AARCH64
 
 // Accessors
 
@@ -152,11 +150,9 @@ inline intptr_t** frame::interpreter_frame_locals_addr() const {
   return (intptr_t**)addr_at(interpreter_frame_locals_offset);
 }
 
-#ifndef AARCH64
 inline intptr_t* frame::interpreter_frame_last_sp() const {
   return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
 }
-#endif // !AARCH64
 
 inline intptr_t* frame::interpreter_frame_bcp_addr() const {
   return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
@@ -185,12 +181,6 @@ inline oop* frame::interpreter_frame_mirror_addr() const {
 
 // top of expression stack
 inline intptr_t* frame::interpreter_frame_tos_address() const {
-#ifdef AARCH64
-  intptr_t* stack_top = (intptr_t*)*addr_at(interpreter_frame_stack_top_offset);
-  assert(stack_top != NULL, "should be stored before call");
-  assert(stack_top <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
-  return stack_top;
-#else
   intptr_t* last_sp = interpreter_frame_last_sp();
   if (last_sp == NULL ) {
     return sp();
@@ -201,7 +191,6 @@ inline intptr_t* frame::interpreter_frame_tos_address() const {
     assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
     return last_sp;
   }
-#endif // AARCH64
 }
 
 inline oop* frame::interpreter_frame_temp_oop_addr() const {
diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp
index 34b5a6d519a..d695107ea93 100644
--- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp
@@ -60,27 +60,16 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
 
     BLOCK_COMMENT("PreBarrier");
 
-#ifdef AARCH64
-    callee_saved_regs = align_up(callee_saved_regs, 2);
-    for (int i = 0; i < callee_saved_regs; i += 2) {
-      __ raw_push(as_Register(i), as_Register(i+1));
-    }
-#else
     RegisterSet saved_regs = RegisterSet(R0, as_Register(callee_saved_regs-1));
     __ push(saved_regs | R9ifScratched);
-#endif // AARCH64
 
     if (addr != R0) {
       assert_different_registers(count, R0);
       __ mov(R0, addr);
     }
-#ifdef AARCH64
-    __ zero_extend(R1, count, 32); // G1BarrierSetRuntime::write_ref_array_pre_*_entry takes size_t
-#else
     if (count != R1) {
       __ mov(R1, count);
     }
-#endif // AARCH64
 
     if (UseCompressedOops) {
       __ call(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry));
@@ -88,13 +77,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
       __ call(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry));
     }
 
-#ifdef AARCH64
-    for (int i = callee_saved_regs - 2; i >= 0; i -= 2) {
-      __ raw_pop(as_Register(i), as_Register(i+1));
-    }
-#else
     __ pop(saved_regs | R9ifScratched);
-#endif // AARCH64
   }
 }
 
@@ -106,9 +89,6 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
     assert_different_registers(count, R0);
     __ mov(R0, addr);
   }
-#ifdef AARCH64
-  __ zero_extend(R1, count, 32); // G1BarrierSetRuntime::write_ref_array_post_entry takes size_t
-#else
   if (count != R1) {
     __ mov(R1, count);
   }
@@ -120,17 +100,14 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
   // difficult for this particular call site.
   __ push(R9);
 #endif // !R9_IS_SCRATCHED
-#endif // !AARCH64
   __ call(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry));
-#ifndef AARCH64
 #if R9_IS_SCRATCHED
   __ pop(R9);
 #endif // !R9_IS_SCRATCHED
-#endif // !AARCH64
 }
 
 // G1 pre-barrier.
-// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+// Blows all volatile registers R0-R3, Rtemp, LR).
 // If store_addr != noreg, then previous value is loaded from [store_addr];
 // in such case store_addr and new_val registers are preserved;
 // otherwise pre_val register is preserved.
@@ -186,20 +163,12 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
   __ bind(runtime);
 
   // save the live input values
-#ifdef AARCH64
-  if (store_addr != noreg) {
-    __ raw_push(store_addr, new_val);
-  } else {
-    __ raw_push(pre_val, ZR);
-  }
-#else
   if (store_addr != noreg) {
     // avoid raw_push to support any ordering of store_addr and new_val
     __ push(RegisterSet(store_addr) | RegisterSet(new_val));
   } else {
     __ push(pre_val);
   }
-#endif // AARCH64
 
   if (pre_val != R0) {
     __ mov(R0, pre_val);
@@ -208,25 +177,17 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
 
   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), R0, R1);
 
-#ifdef AARCH64
-  if (store_addr != noreg) {
-    __ raw_pop(store_addr, new_val);
-  } else {
-    __ raw_pop(pre_val, ZR);
-  }
-#else
   if (store_addr != noreg) {
     __ pop(RegisterSet(store_addr) | RegisterSet(new_val));
   } else {
     __ pop(pre_val);
   }
-#endif // AARCH64
 
   __ bind(done);
 }
 
 // G1 post-barrier.
-// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+// Blows all volatile registers R0-R3, Rtemp, LR).
 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
                                            Register store_addr,
                                            Register new_val,
@@ -246,13 +207,8 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
   // Does store cross heap regions?
 
   __ eor(tmp1, store_addr, new_val);
-#ifdef AARCH64
-  __ logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes);
-  __ cbz(tmp1, done);
-#else
   __ movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes));
   __ b(done, eq);
-#endif
 
   // crosses regions, storing NULL?
 
@@ -333,12 +289,8 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
   const Register store_addr = obj.base();
   if (obj.index() != noreg) {
     assert (obj.disp() == 0, "index or displacement, not both");
-#ifdef AARCH64
-    __ add(store_addr, obj.base(), obj.index(), obj.extend(), obj.shift_imm());
-#else
     assert(obj.offset_op() == add_offset, "addition is expected");
     __ add(store_addr, obj.base(), AsmOperand(obj.index(), obj.shift(), obj.shift_imm()));
-#endif // AARCH64
   } else if (obj.disp() != 0) {
     __ add(store_addr, obj.base(), obj.disp());
   }
@@ -415,16 +367,10 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
   __ set_info("g1_pre_barrier_slow_id", false);
 
   // save at least the registers that need saving if the runtime is called
-#ifdef AARCH64
-  __ raw_push(R0, R1);
-  __ raw_push(R2, R3);
-  const int nb_saved_regs = 4;
-#else // AARCH64
   const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR);
   const int nb_saved_regs = 6;
   assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs");
   __ push(saved_regs);
-#endif // AARCH64
 
   const Register r_pre_val_0  = R0; // must be R0, to be ready for the runtime call
   const Register r_index_1    = R1;
@@ -454,12 +400,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
 
   __ bind(done);
 
-#ifdef AARCH64
-  __ raw_pop(R2, R3);
-  __ raw_pop(R0, R1);
-#else // AARCH64
   __ pop(saved_regs);
-#endif // AARCH64
 
   __ ret();
 
@@ -492,16 +433,10 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
   AddressLiteral cardtable(ci_card_table_address_as<address>(), relocInfo::none);
 
   // save at least the registers that need saving if the runtime is called
-#ifdef AARCH64
-  __ raw_push(R0, R1);
-  __ raw_push(R2, R3);
-  const int nb_saved_regs = 4;
-#else // AARCH64
   const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR);
   const int nb_saved_regs = 6;
   assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs");
   __ push(saved_regs);
-#endif // AARCH64
 
   const Register r_card_addr_0 = R0; // must be R0 for the slow case
   const Register r_obj_0 = R0;
@@ -528,12 +463,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
 
   __ bind(done);
 
-#ifdef AARCH64
-  __ raw_pop(R2, R3);
-  __ raw_pop(R0, R1);
-#else // AARCH64
   __ pop(saved_regs);
-#endif // AARCH64
 
   __ ret();
 
diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
index 3044dc86a6c..c570b89f79f 100644
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
@@ -35,12 +35,6 @@ void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators,
   case T_OBJECT:
   case T_ARRAY: {
     if (in_heap) {
-#ifdef AARCH64
-      if (UseCompressedOops) {
-        __ ldr_w(dst, src);
-        __ decode_heap_oop(dst);
-      } else
-#endif // AARCH64
       {
         __ ldr(dst, src);
       }
@@ -63,15 +57,6 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators
   case T_OBJECT:
   case T_ARRAY: {
     if (in_heap) {
-#ifdef AARCH64
-      if (UseCompressedOops) {
-        assert(!dst.uses(src), "not enough registers");
-        if (!is_null) {
-          __ encode_heap_oop(src);
-        }
-        __ str_w(val, obj);
-      } else
-#endif // AARCH64
       {
         __ str(val, obj);
       }
diff --git a/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp
index c538561ca5e..50f5b9a4aec 100644
--- a/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp
@@ -119,7 +119,6 @@ void CardTableBarrierSetAssembler::store_check_part1(MacroAssembler* masm, Regis
      Possible cause is a cache miss (card table base address resides in a
      rarely accessed area of thread descriptor).
   */
-  // TODO-AARCH64 Investigate if mov_slow is faster than ldr from Rthread on AArch64
   __ mov_address(card_table_base, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference);
 }
 
@@ -136,12 +135,7 @@ void CardTableBarrierSetAssembler::store_check_part2(MacroAssembler* masm, Regis
   assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "Adjust store check code");
 
   assert(CardTable::dirty_card_val() == 0, "Dirty card value must be 0 due to optimizations.");
-#ifdef AARCH64
-  add(card_table_base, card_table_base, AsmOperand(obj, lsr, CardTable::card_shift));
-  Address card_table_addr(card_table_base);
-#else
   Address card_table_addr(card_table_base, obj, lsr, CardTable::card_shift);
-#endif
 
   if (UseCondCardMark) {
     if (ct->scanned_concurrently()) {
@@ -164,9 +158,6 @@ void CardTableBarrierSetAssembler::store_check_part2(MacroAssembler* masm, Regis
 }
 
 void CardTableBarrierSetAssembler::set_card(MacroAssembler* masm, Register card_table_base, Address card_table_addr, Register tmp) {
-#ifdef AARCH64
-  strb(ZR, card_table_addr);
-#else
   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
   CardTable* ct = ctbs->card_table();
   if ((((uintptr_t)ct->byte_map_base() & 0xff) == 0)) {
@@ -178,5 +169,4 @@ void CardTableBarrierSetAssembler::set_card(MacroAssembler* masm, Register card_
     __ mov(tmp, 0);
     __ strb(tmp, card_table_addr);
   }
-#endif // AARCH64
 }
diff --git a/src/hotspot/cpu/arm/globalDefinitions_arm.hpp b/src/hotspot/cpu/arm/globalDefinitions_arm.hpp
index 29a856d8f75..10e97dfb84f 100644
--- a/src/hotspot/cpu/arm/globalDefinitions_arm.hpp
+++ b/src/hotspot/cpu/arm/globalDefinitions_arm.hpp
@@ -25,19 +25,7 @@
 #ifndef CPU_ARM_VM_GLOBALDEFINITIONS_ARM_HPP
 #define CPU_ARM_VM_GLOBALDEFINITIONS_ARM_HPP
 
-#ifdef AARCH64
-#define AARCH64_ONLY(code) code
-#define AARCH64_ONLY_ARG(arg) , arg
-#define NOT_AARCH64(code)
-#define NOT_AARCH64_ARG(arg)
-#else
-#define AARCH64_ONLY(code)
-#define AARCH64_ONLY_ARG(arg)
-#define NOT_AARCH64(code) code
-#define NOT_AARCH64_ARG(arg) , arg
-#endif
-
-const int StackAlignmentInBytes = AARCH64_ONLY(16) NOT_AARCH64(8);
+const int StackAlignmentInBytes = 8;
 
 // Indicates whether the C calling conventions require that
 // 32-bit integer argument values are extended to 64 bits.
@@ -49,24 +37,19 @@ const bool HaveVFP = false;
 const bool HaveVFP = true;
 #endif
 
-#if defined(__ARM_PCS_VFP) || defined(AARCH64)
+#if defined(__ARM_PCS_VFP)
 #define __ABI_HARD__
 #endif
 
-#if defined(__ARM_ARCH_7A__) || defined(AARCH64)
+#if defined(__ARM_ARCH_7A__)
 #define SUPPORTS_NATIVE_CX8
 #endif
 
 #define STUBROUTINES_MD_HPP    "stubRoutines_arm.hpp"
 #define INTERP_MASM_MD_HPP     "interp_masm_arm.hpp"
 #define TEMPLATETABLE_MD_HPP   "templateTable_arm.hpp"
-#ifdef AARCH64
-#define ADGLOBALS_MD_HPP       "adfiles/adGlobals_arm_64.hpp"
-#define AD_MD_HPP              "adfiles/ad_arm_64.hpp"
-#else
 #define ADGLOBALS_MD_HPP       "adfiles/adGlobals_arm_32.hpp"
 #define AD_MD_HPP              "adfiles/ad_arm_32.hpp"
-#endif
 #define C1_LIRGENERATOR_MD_HPP "c1_LIRGenerator_arm.hpp"
 
 #endif // CPU_ARM_VM_GLOBALDEFINITIONS_ARM_HPP
diff --git a/src/hotspot/cpu/arm/globals_arm.hpp b/src/hotspot/cpu/arm/globals_arm.hpp
index fd044dabd83..a779fd61ea0 100644
--- a/src/hotspot/cpu/arm/globals_arm.hpp
+++ b/src/hotspot/cpu/arm/globals_arm.hpp
@@ -86,13 +86,5 @@ define_pd_global(bool, ThreadLocalHandshakes, false);
                    notproduct, \
                    range, \
                    constraint, \
-                   writeable) \
-                                                                                        \
-  develop(bool, VerifyInterpreterStackTop, false,                                       \
-          "Verify interpreter stack top at every stack expansion (AArch64 only)")       \
-                                                                                        \
-  develop(bool, ZapHighNonSignificantBits, false,                                       \
-          "Zap high non-significant bits of values (AArch64 only)")                     \
-                                                                                        \
-
+                   writeable)
 #endif // CPU_ARM_VM_GLOBALS_ARM_HPP
diff --git a/src/hotspot/cpu/arm/icBuffer_arm.cpp b/src/hotspot/cpu/arm/icBuffer_arm.cpp
index b158572c0dd..03a53ec19bb 100644
--- a/src/hotspot/cpu/arm/icBuffer_arm.cpp
+++ b/src/hotspot/cpu/arm/icBuffer_arm.cpp
@@ -35,7 +35,7 @@
 #define __ masm->
 
 int InlineCacheBuffer::ic_stub_code_size() {
-  return (AARCH64_ONLY(8) NOT_AARCH64(4)) * Assembler::InstructionSize;
+  return (4 * Assembler::InstructionSize);
 }
 
 void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
diff --git a/src/hotspot/cpu/arm/icache_arm.cpp b/src/hotspot/cpu/arm/icache_arm.cpp
index ae163619fdc..7f258ab8b27 100644
--- a/src/hotspot/cpu/arm/icache_arm.cpp
+++ b/src/hotspot/cpu/arm/icache_arm.cpp
@@ -29,49 +29,12 @@
 
 #define __ _masm->
 
-#ifdef AARCH64
-
-static int icache_flush(address addr, int lines, int magic) {
-  // TODO-AARCH64 Figure out actual cache line size (mrs Xt, CTR_EL0)
-
-  address p = addr;
-  for (int i = 0; i < lines; i++, p += ICache::line_size) {
-    __asm__ volatile(
-      " dc cvau, %[p]"
-      :
-      : [p] "r" (p)
-      : "memory");
-  }
-
-  __asm__ volatile(
-    " dsb ish"
-    : : : "memory");
-
-  p = addr;
-  for (int i = 0; i < lines; i++, p += ICache::line_size) {
-    __asm__ volatile(
-      " ic ivau, %[p]"
-      :
-      : [p] "r" (p)
-      : "memory");
-  }
-
-  __asm__ volatile(
-    " dsb ish\n\t"
-    " isb\n\t"
-    : : : "memory");
-
-  return magic;
-}
-
-#else
 
 static int icache_flush(address addr, int lines, int magic) {
   __builtin___clear_cache(addr, addr + (lines << ICache::log2_line_size));
   return magic;
 }
 
-#endif // AARCH64
 
 void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
   address start = (address)icache_flush;
diff --git a/src/hotspot/cpu/arm/interp_masm_arm.cpp b/src/hotspot/cpu/arm/interp_masm_arm.cpp
index 273f92bdf1a..c12da723159 100644
--- a/src/hotspot/cpu/arm/interp_masm_arm.cpp
+++ b/src/hotspot/cpu/arm/interp_masm_arm.cpp
@@ -54,7 +54,7 @@ InterpreterMacroAssembler::InterpreterMacroAssembler(CodeBuffer* code) : MacroAs
 }
 
 void InterpreterMacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
-#if defined(ASSERT) && !defined(AARCH64)
+#ifdef ASSERT
   // Ensure that last_sp is not filled.
   { Label L;
     ldr(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
@@ -62,27 +62,15 @@ void InterpreterMacroAssembler::call_VM_helper(Register oop_result, address entr
     stop("InterpreterMacroAssembler::call_VM_helper: last_sp != NULL");
     bind(L);
   }
-#endif // ASSERT && !AARCH64
+#endif // ASSERT
 
   // Rbcp must be saved/restored since it may change due to GC.
   save_bcp();
 
-#ifdef AARCH64
-  check_no_cached_stack_top(Rtemp);
-  save_stack_top();
-  check_extended_sp(Rtemp);
-  cut_sp_before_call();
-#endif // AARCH64
 
   // super call
   MacroAssembler::call_VM_helper(oop_result, entry_point, number_of_arguments, check_exceptions);
 
-#ifdef AARCH64
-  // Restore SP to extended SP
-  restore_sp_after_call(Rtemp);
-  check_stack_top();
-  clear_cached_stack_top();
-#endif // AARCH64
 
   // Restore interpreter specific registers.
   restore_bcp();
@@ -128,10 +116,8 @@ void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
   const Address tos_addr(thread_state, JvmtiThreadState::earlyret_tos_offset());
   const Address oop_addr(thread_state, JvmtiThreadState::earlyret_oop_offset());
   const Address val_addr(thread_state, JvmtiThreadState::earlyret_value_offset());
-#ifndef AARCH64
   const Address val_addr_hi(thread_state, JvmtiThreadState::earlyret_value_offset()
                              + in_ByteSize(wordSize));
-#endif // !AARCH64
 
   Register zero = zero_register(Rtemp);
 
@@ -141,11 +127,7 @@ void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
                interp_verify_oop(R0_tos, state, __FILE__, __LINE__);
                break;
 
-#ifdef AARCH64
-    case ltos: ldr(R0_tos, val_addr);              break;
-#else
     case ltos: ldr(R1_tos_hi, val_addr_hi);        // fall through
-#endif // AARCH64
     case btos:                                     // fall through
     case ztos:                                     // fall through
     case ctos:                                     // fall through
@@ -163,9 +145,7 @@ void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
   }
   // Clean up tos value in the thread object
   str(zero, val_addr);
-#ifndef AARCH64
   str(zero, val_addr_hi);
-#endif // !AARCH64
 
   mov(Rtemp, (int) ilgl);
   str_32(Rtemp, tos_addr);
@@ -220,7 +200,6 @@ void InterpreterMacroAssembler::get_index_at_bcp(Register index, int bcp_offset,
     ldrb(tmp_reg, Address(Rbcp, bcp_offset));
     orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte));
   } else if (index_size == sizeof(u4)) {
-    // TODO-AARCH64: consider using unaligned access here
     ldrb(index, Address(Rbcp, bcp_offset+3));
     ldrb(tmp_reg, Address(Rbcp, bcp_offset+2));
     orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte));
@@ -252,7 +231,6 @@ void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Regis
 
   // convert from field index to ConstantPoolCacheEntry index
   assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below");
-  // TODO-AARCH64 merge this shift with shift "add(..., Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord))" after this method is called
   logical_shift_left(index, index, 2);
 }
 
@@ -261,13 +239,8 @@ void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register
   get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
   // caution index and bytecode can be the same
   add(bytecode, cache, AsmOperand(index, lsl, LogBytesPerWord));
-#ifdef AARCH64
-  add(bytecode, bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
-  ldarb(bytecode, bytecode);
-#else
   ldrb(bytecode, Address(bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())));
   TemplateTable::volatile_barrier(MacroAssembler::LoadLoad, noreg, true);
-#endif // AARCH64
 }
 
 // Sets cache. Blows reg_tmp.
@@ -364,31 +337,21 @@ void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
   ldr(supers_arr, Address(Rsub_klass, Klass::secondary_supers_offset()));
 
   ldr_u32(supers_cnt, Address(supers_arr, Array<Klass*>::length_offset_in_bytes())); // Load the array length
-#ifdef AARCH64
-  cbz(supers_cnt, not_subtype);
-  add(supers_arr, supers_arr, Array<Klass*>::base_offset_in_bytes());
-#else
   cmp(supers_cnt, 0);
 
   // Skip to the start of array elements and prefetch the first super-klass.
   ldr(cur_super, Address(supers_arr, Array<Klass*>::base_offset_in_bytes(), pre_indexed), ne);
   b(not_subtype, eq);
-#endif // AARCH64
 
   bind(loop);
 
-#ifdef AARCH64
-  ldr(cur_super, Address(supers_arr, wordSize, post_indexed));
-#endif // AARCH64
 
   cmp(cur_super, Rsuper_klass);
   b(update_cache, eq);
 
   subs(supers_cnt, supers_cnt, 1);
 
-#ifndef AARCH64
   ldr(cur_super, Address(supers_arr, wordSize, pre_indexed), ne);
-#endif // !AARCH64
 
   b(loop, ne);
 
@@ -418,33 +381,18 @@ void InterpreterMacroAssembler::pop_i(Register r) {
   zap_high_non_significant_bits(r);
 }
 
-#ifdef AARCH64
-void InterpreterMacroAssembler::pop_l(Register r) {
-  assert(r != Rstack_top, "unpredictable instruction");
-  ldr(r, Address(Rstack_top, 2*wordSize, post_indexed));
-}
-#else
 void InterpreterMacroAssembler::pop_l(Register lo, Register hi) {
   assert_different_registers(lo, hi);
   assert(lo < hi, "lo must be < hi");
   pop(RegisterSet(lo) | RegisterSet(hi));
 }
-#endif // AARCH64
 
 void InterpreterMacroAssembler::pop_f(FloatRegister fd) {
-#ifdef AARCH64
-  ldr_s(fd, Address(Rstack_top, wordSize, post_indexed));
-#else
   fpops(fd);
-#endif // AARCH64
 }
 
 void InterpreterMacroAssembler::pop_d(FloatRegister fd) {
-#ifdef AARCH64
-  ldr_d(fd, Address(Rstack_top, 2*wordSize, post_indexed));
-#else
   fpopd(fd);
-#endif // AARCH64
 }
 
 
@@ -457,11 +405,7 @@ void InterpreterMacroAssembler::pop(TosState state) {
     case ctos:                                               // fall through
     case stos:                                               // fall through
     case itos: pop_i(R0_tos);                                break;
-#ifdef AARCH64
-    case ltos: pop_l(R0_tos);                                break;
-#else
     case ltos: pop_l(R0_tos_lo, R1_tos_hi);                  break;
-#endif // AARCH64
 #ifdef __SOFTFP__
     case ftos: pop_i(R0_tos);                                break;
     case dtos: pop_l(R0_tos_lo, R1_tos_hi);                  break;
@@ -487,36 +431,18 @@ void InterpreterMacroAssembler::push_i(Register r) {
   check_stack_top_on_expansion();
 }
 
-#ifdef AARCH64
-void InterpreterMacroAssembler::push_l(Register r) {
-  assert(r != Rstack_top, "unpredictable instruction");
-  stp(r, ZR, Address(Rstack_top, -2*wordSize, pre_indexed));
-  check_stack_top_on_expansion();
-}
-#else
 void InterpreterMacroAssembler::push_l(Register lo, Register hi) {
   assert_different_registers(lo, hi);
   assert(lo < hi, "lo must be < hi");
   push(RegisterSet(lo) | RegisterSet(hi));
 }
-#endif // AARCH64
 
 void InterpreterMacroAssembler::push_f() {
-#ifdef AARCH64
-  str_s(S0_tos, Address(Rstack_top, -wordSize, pre_indexed));
-  check_stack_top_on_expansion();
-#else
   fpushs(S0_tos);
-#endif // AARCH64
 }
 
 void InterpreterMacroAssembler::push_d() {
-#ifdef AARCH64
-  str_d(D0_tos, Address(Rstack_top, -2*wordSize, pre_indexed));
-  check_stack_top_on_expansion();
-#else
   fpushd(D0_tos);
-#endif // AARCH64
 }
 
 // Transition state -> vtos. Blows Rtemp.
@@ -529,11 +455,7 @@ void InterpreterMacroAssembler::push(TosState state) {
     case ctos:                                                // fall through
     case stos:                                                // fall through
     case itos: push_i(R0_tos);                                break;
-#ifdef AARCH64
-    case ltos: push_l(R0_tos);                                break;
-#else
     case ltos: push_l(R0_tos_lo, R1_tos_hi);                  break;
-#endif // AARCH64
 #ifdef __SOFTFP__
     case ftos: push_i(R0_tos);                                break;
     case dtos: push_l(R0_tos_lo, R1_tos_hi);                  break;
@@ -547,7 +469,6 @@ void InterpreterMacroAssembler::push(TosState state) {
 }
 
 
-#ifndef AARCH64
 
 // Converts return value in R0/R1 (interpreter calling conventions) to TOS cached value.
 void InterpreterMacroAssembler::convert_retval_to_tos(TosState state) {
@@ -575,7 +496,6 @@ void InterpreterMacroAssembler::convert_tos_to_retval(TosState state) {
 #endif // !__SOFTFP__ && !__ABI_HARD__
 }
 
-#endif // !AARCH64
 
 
 // Helpers for swap and dup
@@ -589,20 +509,12 @@ void InterpreterMacroAssembler::store_ptr(int n, Register val) {
 
 
 void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
-#ifdef AARCH64
-  check_no_cached_stack_top(Rtemp);
-  save_stack_top();
-  cut_sp_before_call();
-  mov(Rparams, Rstack_top);
-#endif // AARCH64
 
   // set sender sp
   mov(Rsender_sp, SP);
 
-#ifndef AARCH64
   // record last_sp
   str(Rsender_sp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
-#endif // !AARCH64
 }
 
 // Jump to from_interpreted entry of a call unless single stepping is possible
@@ -618,19 +530,8 @@ void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
     // interp_only_mode if these events CAN be enabled.
 
     ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset()));
-#ifdef AARCH64
-    {
-      Label not_interp_only_mode;
-
-      cbz(Rtemp, not_interp_only_mode);
-      indirect_jump(Address(method, Method::interpreter_entry_offset()), Rtemp);
-
-      bind(not_interp_only_mode);
-    }
-#else
     cmp(Rtemp, 0);
     ldr(PC, Address(method, Method::interpreter_entry_offset()), ne);
-#endif // AARCH64
   }
 
   indirect_jump(Address(method, Method::from_interpreted_offset()), Rtemp);
@@ -657,12 +558,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
                                               bool verifyoop) {
   if (VerifyActivationFrameSize) {
     Label L;
-#ifdef AARCH64
-    mov(Rtemp, SP);
-    sub(Rtemp, FP, Rtemp);
-#else
     sub(Rtemp, FP, SP);
-#endif // AARCH64
     int min_frame_size = (frame::link_offset - frame::interpreter_frame_initial_sp_offset) * wordSize;
     cmp(Rtemp, min_frame_size);
     b(L, ge);
@@ -691,16 +587,10 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
     if (state == vtos) {
       indirect_jump(Address::indexed_ptr(RdispatchTable, R3_bytecode), Rtemp);
     } else {
-#ifdef AARCH64
-      sub(Rtemp, R3_bytecode, (Interpreter::distance_from_dispatch_table(vtos) -
-                           Interpreter::distance_from_dispatch_table(state)));
-      indirect_jump(Address::indexed_ptr(RdispatchTable, Rtemp), Rtemp);
-#else
       // on 32-bit ARM this method is faster than the one above.
       sub(Rtemp, RdispatchTable, (Interpreter::distance_from_dispatch_table(vtos) -
                            Interpreter::distance_from_dispatch_table(state)) * wordSize);
       indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp);
-#endif
     }
   } else {
     assert(table_mode == DispatchNormal, "invalid dispatch table mode");
@@ -896,25 +786,18 @@ void InterpreterMacroAssembler::remove_activation(TosState state, Register ret_a
                                  // points to word before bottom of monitor block
 
     cmp(Rcur, Rbottom);          // check if there are no monitors
-#ifndef AARCH64
     ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
                                  // prefetch monitor's object
-#endif // !AARCH64
     b(no_unlock, eq);
 
     bind(loop);
-#ifdef AARCH64
-    ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()));
-#endif // AARCH64
     // check if current entry is used
     cbnz(Rcur_obj, exception_monitor_is_still_locked);
 
     add(Rcur, Rcur, entry_size);      // otherwise advance to next entry
     cmp(Rcur, Rbottom);               // check if bottom reached
-#ifndef AARCH64
     ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
                                       // prefetch monitor's object
-#endif // !AARCH64
     b(loop, ne);                      // if not at bottom then check this entry
   }
 
@@ -928,15 +811,9 @@ void InterpreterMacroAssembler::remove_activation(TosState state, Register ret_a
   }
 
   // remove activation
-#ifdef AARCH64
-  ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
-  ldp(FP, LR, Address(FP));
-  mov(SP, Rtemp);
-#else
   mov(Rtemp, FP);
   ldmia(FP, RegisterSet(FP) | RegisterSet(LR));
   ldr(SP, Address(Rtemp, frame::interpreter_frame_sender_sp_offset * wordSize));
-#endif
 
   if (ret_addr != LR) {
     mov(ret_addr, LR);
@@ -964,7 +841,7 @@ void InterpreterMacroAssembler::set_do_not_unlock_if_synchronized(bool flag, Reg
 //
 // Argument: R1 : Points to BasicObjectLock to be used for locking.
 // Must be initialized with object to lock.
-// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM.
+// Blows volatile registers R0-R3, Rtemp, LR. Calls VM.
 void InterpreterMacroAssembler::lock_object(Register Rlock) {
   assert(Rlock == R1, "the second argument");
 
@@ -990,15 +867,6 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) {
       biased_locking_enter(Robj, Rmark/*scratched*/, R0, false, Rtemp, done, slow_case);
     }
 
-#ifdef AARCH64
-    assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
-    ldr(Rmark, Robj);
-
-    // Test if object is already locked
-    assert(markOopDesc::unlocked_value == 1, "adjust this code");
-    tbz(Rmark, exact_log2(markOopDesc::unlocked_value), already_locked);
-
-#else // AARCH64
 
     // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
     // That would be acceptable as ether CAS or slow case path is taken in that case.
@@ -1012,7 +880,6 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) {
     tst(Rmark, markOopDesc::unlocked_value);
     b(already_locked, eq);
 
-#endif // !AARCH64
     // Save old object->mark() into BasicLock's displaced header
     str(Rmark, Address(Rlock, mark_offset));
 
@@ -1058,19 +925,6 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) {
     // conditions into a single test:
     // => ((mark - SP) & (3 - os::pagesize())) == 0
 
-#ifdef AARCH64
-    // Use the single check since the immediate is OK for AARCH64
-    sub(R0, Rmark, Rstack_top);
-    intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
-    Assembler::LogicalImmediate imm(mask, false);
-    ands(R0, R0, imm);
-
-    // For recursive case store 0 into lock record.
-    // It is harmless to store it unconditionally as lock record contains some garbage
-    // value in its _displaced_header field by this moment.
-    str(ZR, Address(Rlock, mark_offset));
-
-#else // AARCH64
     // (3 - os::pagesize()) cannot be encoded as an ARM immediate operand.
     // Check independently the low bits and the distance to SP.
     // -1- test low 2 bits
@@ -1081,7 +935,6 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) {
     // If still 'eq' then recursive locking OK: store 0 into lock record
     str(R0, Address(Rlock, mark_offset), eq);
 
-#endif // AARCH64
 
 #ifndef PRODUCT
     if (PrintBiasedLockingStatistics) {
@@ -1105,7 +958,7 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) {
 //
 // Argument: R1: Points to BasicObjectLock structure for lock
 // Throw an IllegalMonitorException if object is not locked by current thread
-// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM.
+// Blows volatile registers R0-R3, Rtemp, LR. Calls VM.
 void InterpreterMacroAssembler::unlock_object(Register Rlock) {
   assert(Rlock == R1, "the second argument");
 
@@ -1167,7 +1020,7 @@ void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, Label& ze
 
 
 // Set the method data pointer for the current bcp.
-// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR.
+// Blows volatile registers R0-R3, Rtemp, LR.
 void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
   assert(ProfileInterpreter, "must be profiling interpreter");
   Label set_mdp;
@@ -1264,22 +1117,12 @@ void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
     // Decrement the register. Set condition codes.
     subs(bumped_count, bumped_count, DataLayout::counter_increment);
     // Avoid overflow.
-#ifdef AARCH64
-    assert(DataLayout::counter_increment == 1, "required for cinc");
-    cinc(bumped_count, bumped_count, pl);
-#else
     add(bumped_count, bumped_count, DataLayout::counter_increment, pl);
-#endif // AARCH64
   } else {
     // Increment the register. Set condition codes.
     adds(bumped_count, bumped_count, DataLayout::counter_increment);
     // Avoid overflow.
-#ifdef AARCH64
-    assert(DataLayout::counter_increment == 1, "required for cinv");
-    cinv(bumped_count, bumped_count, mi); // inverts 0x80..00 back to 0x7f..ff
-#else
     sub(bumped_count, bumped_count, DataLayout::counter_increment, mi);
-#endif // AARCH64
   }
   str(bumped_count, data);
 }
@@ -1327,7 +1170,7 @@ void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, int cons
 }
 
 
-// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+// Blows volatile registers R0-R3, Rtemp, LR).
 void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
   assert(ProfileInterpreter, "must be profiling interpreter");
   assert_different_registers(return_bci, R0, R1, R2, R3, Rtemp);
@@ -1541,7 +1384,7 @@ void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
   bind (done);
 }
 
-// Sets mdp, blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+// Sets mdp, blows volatile registers R0-R3, Rtemp, LR).
 void InterpreterMacroAssembler::profile_ret(Register mdp, Register return_bci) {
   assert_different_registers(mdp, return_bci, Rtemp, R0, R1, R2, R3);
 
@@ -1703,9 +1546,6 @@ void InterpreterMacroAssembler::profile_switch_case(Register mdp, Register index
 
 
 void InterpreterMacroAssembler::byteswap_u32(Register r, Register rtmp1, Register rtmp2) {
-#ifdef AARCH64
-  rev_w(r, r);
-#else
   if (VM_Version::supports_rev()) {
     rev(r, r);
   } else {
@@ -1714,7 +1554,6 @@ void InterpreterMacroAssembler::byteswap_u32(Register r, Register rtmp1, Registe
     andr(rtmp1, rtmp2, AsmOperand(rtmp1, lsr, 8));
     eor(r, rtmp1, AsmOperand(r, ror, 8));
   }
-#endif // AARCH64
 }
 
 
@@ -1722,7 +1561,7 @@ void InterpreterMacroAssembler::inc_global_counter(address address_of_counter, i
   const intx addr = (intx) (address_of_counter + offset);
 
   assert ((addr & 0x3) == 0, "address of counter should be aligned");
-  const intx offset_mask = right_n_bits(AARCH64_ONLY(12 + 2) NOT_AARCH64(12));
+  const intx offset_mask = right_n_bits(12);
 
   const address base = (address) (addr & ~offset_mask);
   const int offs = (int) (addr & offset_mask);
@@ -1735,14 +1574,7 @@ void InterpreterMacroAssembler::inc_global_counter(address address_of_counter, i
 
   if (avoid_overflow) {
     adds_32(val, val, 1);
-#ifdef AARCH64
-    Label L;
-    b(L, mi);
-    str_32(val, Address(addr_base, offs));
-    bind(L);
-#else
     str(val, Address(addr_base, offs), pl);
-#endif // AARCH64
   } else {
     add_32(val, val, 1);
     str_32(val, Address(addr_base, offs));
@@ -1822,17 +1654,9 @@ void InterpreterMacroAssembler::notify_method_exit(
     if (native) {
       // For c++ and template interpreter push both result registers on the
       // stack in native, we don't know the state.
-      // On AArch64 result registers are stored into the frame at known locations.
       // See frame::interpreter_frame_result for code that gets the result values from here.
       assert(result_lo != noreg, "result registers should be defined");
 
-#ifdef AARCH64
-      assert(result_hi == noreg, "result_hi is not used on AArch64");
-      assert(result_fp != fnoreg, "FP result register must be defined");
-
-      str_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize));
-      str(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize));
-#else
       assert(result_hi != noreg, "result registers should be defined");
 
 #ifdef __ABI_HARD__
@@ -1842,20 +1666,14 @@ void InterpreterMacroAssembler::notify_method_exit(
 #endif // __ABI_HARD__
 
       push(RegisterSet(result_lo) | RegisterSet(result_hi));
-#endif // AARCH64
 
       call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
 
-#ifdef AARCH64
-      ldr_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize));
-      ldr(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize));
-#else
       pop(RegisterSet(result_lo) | RegisterSet(result_hi));
 #ifdef __ABI_HARD__
       fldd(result_fp, Address(SP));
       add(SP, SP, 2 * wordSize);
 #endif // __ABI_HARD__
-#endif // AARCH64
 
     } else {
       // For the template interpreter, the value on tos is the size of the
@@ -1931,13 +1749,8 @@ void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
   add(scratch, scratch, increment);
   str_32(scratch, counter_addr);
 
-#ifdef AARCH64
-  ldr_u32(scratch2, mask_addr);
-  ands_w(ZR, scratch, scratch2);
-#else
   ldr(scratch2, mask_addr);
   andrs(scratch, scratch, scratch2);
-#endif // AARCH64
   b(*where, cond);
 }
 
@@ -1958,26 +1771,15 @@ void InterpreterMacroAssembler::get_method_counters(Register method,
     // Save and restore in use caller-saved registers since they will be trashed by call_VM
     assert(reg1 != noreg, "must specify reg1");
     assert(reg2 != noreg, "must specify reg2");
-#ifdef AARCH64
-    assert(reg3 != noreg, "must specify reg3");
-    stp(reg1, reg2, Address(Rstack_top, -2*wordSize, pre_indexed));
-    stp(reg3, ZR, Address(Rstack_top, -2*wordSize, pre_indexed));
-#else
     assert(reg3 == noreg, "must not specify reg3");
     push(RegisterSet(reg1) | RegisterSet(reg2));
-#endif
   }
 
   mov(R1, method);
   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), R1);
 
   if (saveRegs) {
-#ifdef AARCH64
-    ldp(reg3, ZR, Address(Rstack_top, 2*wordSize, post_indexed));
-    ldp(reg1, reg2, Address(Rstack_top, 2*wordSize, post_indexed));
-#else
     pop(RegisterSet(reg1) | RegisterSet(reg2));
-#endif
   }
 
   ldr(Rcounters, method_counters);
diff --git a/src/hotspot/cpu/arm/interp_masm_arm.hpp b/src/hotspot/cpu/arm/interp_masm_arm.hpp
index 70d694ae494..3b72f29f5fc 100644
--- a/src/hotspot/cpu/arm/interp_masm_arm.hpp
+++ b/src/hotspot/cpu/arm/interp_masm_arm.hpp
@@ -63,48 +63,12 @@ class InterpreterMacroAssembler: public MacroAssembler {
   virtual void check_and_handle_earlyret();
 
   // Interpreter-specific registers
-#if defined(AARCH64) && defined(ASSERT)
-
-#define check_stack_top()               _check_stack_top("invalid Rstack_top at " __FILE__ ":" XSTR(__LINE__))
-#define check_stack_top_on_expansion()  _check_stack_top("invalid Rstack_top at " __FILE__ ":" XSTR(__LINE__), VerifyInterpreterStackTop)
-#define check_extended_sp(tmp)          _check_extended_sp(tmp, "SP does not match extended SP in frame at " __FILE__ ":" XSTR(__LINE__))
-#define check_no_cached_stack_top(tmp)  _check_no_cached_stack_top(tmp, "stack_top is already cached in frame at " __FILE__ ":" XSTR(__LINE__))
-
-  void _check_stack_top(const char* msg, bool enabled = true) {
-      if (enabled) {
-          Label L;
-          cmp(SP, Rstack_top);
-          b(L, ls);
-          stop(msg);
-          bind(L);
-      }
-  }
-
-  void _check_extended_sp(Register tmp, const char* msg) {
-      Label L;
-      ldr(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize));
-      cmp(SP, tmp);
-      b(L, eq);
-      stop(msg);
-      bind(L);
-  }
-
-  void _check_no_cached_stack_top(Register tmp, const char* msg) {
-      Label L;
-      ldr(tmp, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
-      cbz(tmp, L);
-      stop(msg);
-      bind(L);
-  }
-
-#else
 
   inline void check_stack_top() {}
   inline void check_stack_top_on_expansion() {}
   inline void check_extended_sp(Register tmp) {}
   inline void check_no_cached_stack_top(Register tmp) {}
 
-#endif // AARCH64 && ASSERT
 
   void save_bcp()                                          { str(Rbcp, Address(FP, frame::interpreter_frame_bcp_offset * wordSize)); }
   void restore_bcp()                                       { ldr(Rbcp, Address(FP, frame::interpreter_frame_bcp_offset * wordSize)); }
@@ -112,13 +76,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
   void restore_method()                                    { ldr(Rmethod, Address(FP, frame::interpreter_frame_method_offset * wordSize)); }
   void restore_dispatch();
 
-#ifdef AARCH64
-  void save_stack_top()                                    { check_stack_top(); str(Rstack_top, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); }
-  void clear_cached_stack_top()                            { str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); }
-  void restore_stack_top()                                 { ldr(Rstack_top, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); clear_cached_stack_top(); check_stack_top(); }
-  void cut_sp_before_call()                                { align_reg(SP, Rstack_top, StackAlignmentInBytes); }
-  void restore_sp_after_call(Register tmp)                 { ldr(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); mov(SP, tmp); }
-#endif
 
   // Helpers for runtime call arguments/results
   void get_const(Register reg)                             { ldr(reg, Address(Rmethod, Method::const_offset())); }
@@ -145,21 +102,13 @@ class InterpreterMacroAssembler: public MacroAssembler {
 
   void pop_ptr(Register r);
   void pop_i(Register r = R0_tos);
-#ifdef AARCH64
-  void pop_l(Register r = R0_tos);
-#else
   void pop_l(Register lo = R0_tos_lo, Register hi = R1_tos_hi);
-#endif
   void pop_f(FloatRegister fd);
   void pop_d(FloatRegister fd);
 
   void push_ptr(Register r);
   void push_i(Register r = R0_tos);
-#ifdef AARCH64
-  void push_l(Register r = R0_tos);
-#else
   void push_l(Register lo = R0_tos_lo, Register hi = R1_tos_hi);
-#endif
   void push_f();
   void push_d();
 
@@ -168,7 +117,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
   // Transition state -> vtos. Blows Rtemp.
   void push(TosState state);
 
-#ifndef AARCH64
   // The following methods are overridden to allow overloaded calls to
   //   MacroAssembler::push/pop(Register)
   //   MacroAssembler::push/pop(RegisterSet)
@@ -183,7 +131,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
   void convert_retval_to_tos(TosState state);
   // Converts TOS cached value to return value in R0/R1 (according to interpreter calling conventions).
   void convert_tos_to_retval(TosState state);
-#endif
 
   // JVMTI ForceEarlyReturn support
   void load_earlyret_value(TosState state);
@@ -194,12 +141,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
   void empty_expression_stack() {
       ldr(Rstack_top, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize));
       check_stack_top();
-#ifdef AARCH64
-      clear_cached_stack_top();
-#else
       // NULL last_sp until next java call
       str(zero_register(Rtemp), Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
-#endif // AARCH64
   }
 
   // Helpers for swap and dup
diff --git a/src/hotspot/cpu/arm/interpreterRT_arm.cpp b/src/hotspot/cpu/arm/interpreterRT_arm.cpp
index a3eb57e16bf..0d38d93ad2b 100644
--- a/src/hotspot/cpu/arm/interpreterRT_arm.cpp
+++ b/src/hotspot/cpu/arm/interpreterRT_arm.cpp
@@ -44,13 +44,9 @@ InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
   _abi_offset = 0;
   _ireg = is_static() ? 2 : 1;
 #ifdef __ABI_HARD__
-#ifdef AARCH64
-  _freg = 0;
-#else
   _fp_slot = 0;
   _single_fpr_slot = 0;
 #endif
-#endif
 }
 
 #ifdef SHARING_FAST_NATIVE_FINGERPRINTS
@@ -127,17 +123,6 @@ void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
 }
 
 void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
-#ifdef AARCH64
-  if (_ireg < GPR_PARAMS) {
-    Register dst = as_Register(_ireg);
-    __ ldr(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1)));
-    _ireg++;
-  } else {
-    __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1)));
-    __ str(Rtemp, Address(SP, _abi_offset * wordSize));
-    _abi_offset++;
-  }
-#else
   if (_ireg <= 2) {
 #if (ALIGN_WIDE_ARGUMENTS == 1)
     if ((_ireg & 1) != 0) {
@@ -171,24 +156,9 @@ void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
     _abi_offset += 2;
     _ireg = 4;
   }
-#endif // AARCH64
 }
 
 void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
-#ifdef AARCH64
-  __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
-  __ cmp(Rtemp, 0);
-  __ sub(Rtemp, Rlocals, -Interpreter::local_offset_in_bytes(offset()));
-  if (_ireg < GPR_PARAMS) {
-    Register dst = as_Register(_ireg);
-    __ csel(dst, ZR, Rtemp, eq);
-    _ireg++;
-  } else {
-    __ csel(Rtemp, ZR, Rtemp, eq);
-    __ str(Rtemp, Address(SP, _abi_offset * wordSize));
-    _abi_offset++;
-  }
-#else
   if (_ireg < 4) {
     Register dst = as_Register(_ireg);
     __ ldr(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
@@ -202,7 +172,6 @@ void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
     __ str(Rtemp, Address(SP, _abi_offset * wordSize));
     _abi_offset++;
   }
-#endif // AARCH64
 }
 
 #ifndef __ABI_HARD__
@@ -221,17 +190,6 @@ void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
 #else
 #ifndef __SOFTFP__
 void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
-#ifdef AARCH64
-    if (_freg < FPR_PARAMS) {
-      FloatRegister dst = as_FloatRegister(_freg);
-      __ ldr_s(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
-      _freg++;
-    } else {
-      __ ldr_u32(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset())));
-      __ str_32(Rtemp, Address(SP, _abi_offset * wordSize));
-      _abi_offset++;
-    }
-#else
     if((_fp_slot < 16) || (_single_fpr_slot & 1)) {
       if ((_single_fpr_slot & 1) == 0) {
         _single_fpr_slot = _fp_slot;
@@ -244,21 +202,9 @@ void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
       __ str(Rtemp, Address(SP, _abi_offset * wordSize));
       _abi_offset++;
     }
-#endif // AARCH64
 }
 
 void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
-#ifdef AARCH64
-    if (_freg < FPR_PARAMS) {
-      FloatRegister dst = as_FloatRegister(_freg);
-      __ ldr_d(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1)));
-      _freg++;
-    } else {
-      __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1)));
-      __ str(Rtemp, Address(SP, _abi_offset * wordSize));
-      _abi_offset++;
-    }
-#else
     if(_fp_slot <= 14) {
       __ fldd(as_FloatRegister(_fp_slot), Address(Rlocals, Interpreter::local_offset_in_bytes(offset()+1)));
       _fp_slot += 2;
@@ -270,7 +216,6 @@ void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
       _abi_offset += 2;
       _single_fpr_slot = 16;
     }
-#endif // AARCH64
 }
 #endif // __SOFTFP__
 #endif // __ABI_HARD__
@@ -333,9 +278,7 @@ class SlowSignatureHandler: public NativeSignatureIterator {
   intptr_t* _toGP;
   int       _last_gp;
   int       _last_fp;
-#ifndef AARCH64
   int       _last_single_fp;
-#endif // !AARCH64
 
   virtual void pass_int() {
     if(_last_gp < GPR_PARAMS) {
@@ -347,13 +290,6 @@ class SlowSignatureHandler: public NativeSignatureIterator {
   }
 
   virtual void pass_long() {
-#ifdef AARCH64
-    if(_last_gp < GPR_PARAMS) {
-      _toGP[_last_gp++] = *(jlong *)(_from+Interpreter::local_offset_in_bytes(1));
-    } else {
-      *_to++ = *(jlong *)(_from+Interpreter::local_offset_in_bytes(1));
-    }
-#else
     assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
     if (_last_gp <= 2) {
       if(_last_gp & 1) _last_gp++;
@@ -369,7 +305,6 @@ class SlowSignatureHandler: public NativeSignatureIterator {
       _to += 2;
       _last_gp = 4;
     }
-#endif // AARCH64
     _from -= 2*Interpreter::stackElementSize;
   }
 
@@ -384,13 +319,6 @@ class SlowSignatureHandler: public NativeSignatureIterator {
   }
 
   virtual void pass_float() {
-#ifdef AARCH64
-    if(_last_fp < FPR_PARAMS) {
-      _toFP[_last_fp++] = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
-    } else {
-      *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
-    }
-#else
     if((_last_fp < 16) || (_last_single_fp & 1)) {
       if ((_last_single_fp & 1) == 0) {
         _last_single_fp = _last_fp;
@@ -401,18 +329,10 @@ class SlowSignatureHandler: public NativeSignatureIterator {
     } else {
       *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
     }
-#endif // AARCH64
     _from -= Interpreter::stackElementSize;
   }
 
   virtual void pass_double() {
-#ifdef AARCH64
-    if(_last_fp < FPR_PARAMS) {
-      _toFP[_last_fp++] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
-    } else {
-      *_to++ = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
-    }
-#else
     assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
     if(_last_fp <= 14) {
       _toFP[_last_fp++] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
@@ -426,7 +346,6 @@ class SlowSignatureHandler: public NativeSignatureIterator {
       _to += 2;
       _last_single_fp = 16;
     }
-#endif // AARCH64
     _from -= 2*Interpreter::stackElementSize;
   }
 
@@ -440,12 +359,10 @@ class SlowSignatureHandler: public NativeSignatureIterator {
 #ifdef __ABI_HARD__
     _toGP  = to;
     _toFP = _toGP + GPR_PARAMS;
-    _to   = _toFP + AARCH64_ONLY(FPR_PARAMS) NOT_AARCH64(8*2);
+    _to   = _toFP + (8*2);
     _last_gp = (is_static() ? 2 : 1);
     _last_fp = 0;
-#ifndef AARCH64
     _last_single_fp = 0;
-#endif // !AARCH64
 #else
     _to   = to + (is_static() ? 2 : 1);
 #endif // __ABI_HARD__
diff --git a/src/hotspot/cpu/arm/interpreterRT_arm.hpp b/src/hotspot/cpu/arm/interpreterRT_arm.hpp
index 0c04a0c25cc..fc75c79bb29 100644
--- a/src/hotspot/cpu/arm/interpreterRT_arm.hpp
+++ b/src/hotspot/cpu/arm/interpreterRT_arm.hpp
@@ -34,12 +34,8 @@ class SignatureHandlerGenerator: public NativeSignatureIterator {
   int  _ireg;
 
 #ifdef __ABI_HARD__
-#ifdef AARCH64
-  int _freg;
-#else
   int _fp_slot; // number of FPR's with arguments loaded
   int _single_fpr_slot;
-#endif
 #endif
 
   void move(int from_offset, int to_offset);
@@ -60,10 +56,8 @@ class SignatureHandlerGenerator: public NativeSignatureIterator {
   void generate(uint64_t fingerprint);
 };
 
-#ifndef AARCH64
 // ARM provides a normalized fingerprint for native calls (to increase
 // sharing). See normalize_fast_native_fingerprint
 #define SHARING_FAST_NATIVE_FINGERPRINTS
-#endif
 
 #endif // CPU_ARM_VM_INTERPRETERRT_ARM_HPP
diff --git a/src/hotspot/cpu/arm/jniFastGetField_arm.cpp b/src/hotspot/cpu/arm/jniFastGetField_arm.cpp
index 65f929b1025..6cab074c4c8 100644
--- a/src/hotspot/cpu/arm/jniFastGetField_arm.cpp
+++ b/src/hotspot/cpu/arm/jniFastGetField_arm.cpp
@@ -78,26 +78,19 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
   // R1 - object handle
   // R2 - jfieldID
 
-  const Register Rsafepoint_counter_addr = AARCH64_ONLY(R4) NOT_AARCH64(R3);
-  const Register Robj = AARCH64_ONLY(R5) NOT_AARCH64(R1);
-  const Register Rres = AARCH64_ONLY(R6) NOT_AARCH64(R0);
-#ifndef AARCH64
+  const Register Rsafepoint_counter_addr = R3;
+  const Register Robj = R1;
+  const Register Rres = R0;
   const Register Rres_hi = R1;
-#endif // !AARCH64
   const Register Rsafept_cnt = Rtemp;
   const Register Rsafept_cnt2 = Rsafepoint_counter_addr;
-  const Register Rtmp1 = AARCH64_ONLY(R7) NOT_AARCH64(R3); // same as Rsafepoint_counter_addr on 32-bit ARM
-  const Register Rtmp2 = AARCH64_ONLY(R8) NOT_AARCH64(R2); // same as jfieldID on 32-bit ARM
+  const Register Rtmp1 = R3; // same as Rsafepoint_counter_addr
+  const Register Rtmp2 = R2; // same as jfieldID
 
-#ifdef AARCH64
-  assert_different_registers(Rsafepoint_counter_addr, Rsafept_cnt, Robj, Rres, Rtmp1, Rtmp2, R0, R1, R2, LR);
-  assert_different_registers(Rsafept_cnt2, Rsafept_cnt, Rres, R0, R1, R2, LR);
-#else
   assert_different_registers(Rsafepoint_counter_addr, Rsafept_cnt, Robj, Rres, LR);
   assert_different_registers(Rsafept_cnt, R1, R2, Rtmp1, LR);
   assert_different_registers(Rsafepoint_counter_addr, Rsafept_cnt, Rres, Rres_hi, Rtmp2, LR);
   assert_different_registers(Rsafept_cnt2, Rsafept_cnt, Rres, Rres_hi, LR);
-#endif // AARCH64
 
   address fast_entry;
 
@@ -112,20 +105,12 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
   Label slow_case;
   __ ldr_literal(Rsafepoint_counter_addr, safepoint_counter_addr);
 
-#ifndef AARCH64
   __ push(RegisterSet(R0, R3));  // save incoming arguments for slow case
-#endif // !AARCH64
 
   __ ldr_s32(Rsafept_cnt, Address(Rsafepoint_counter_addr));
   __ tbnz(Rsafept_cnt, 0, slow_case);
 
-#ifdef AARCH64
-  // If mask changes we need to ensure that the inverse is still encodable as an immediate
-  STATIC_ASSERT(JNIHandles::weak_tag_mask == 1);
-  __ andr(R1, R1, ~JNIHandles::weak_tag_mask);
-#else
   __ bic(R1, R1, JNIHandles::weak_tag_mask);
-#endif
 
   if (os::is_MP()) {
     // Address dependency restricts memory access ordering. It's cheaper than explicit LoadLoad barrier
@@ -135,10 +120,6 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
     __ ldr(Robj, Address(R1));
   }
 
-#ifdef AARCH64
-  __ add(Robj, Robj, AsmOperand(R2, lsr, 2));
-  Address field_addr = Address(Robj);
-#else
   Address field_addr;
   if (type != T_BOOLEAN
       && type != T_INT
@@ -152,7 +133,6 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
   } else {
     field_addr = Address(Robj, R2, lsr, 2);
   }
-#endif // AARCH64
   assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
   speculative_load_pclist[count] = __ pc();
 
@@ -179,12 +159,8 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
 #ifndef __ABI_HARD__
     case T_DOUBLE:
 #endif
-#ifdef AARCH64
-      __ ldr(Rres, field_addr);
-#else
       // Safe to use ldrd since long and double fields are 8-byte aligned
       __ ldrd(Rres, field_addr);
-#endif // AARCH64
       break;
 #ifdef __ABI_HARD__
     case T_FLOAT:
@@ -200,18 +176,16 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
 
   if(os::is_MP()) {
       // Address dependency restricts memory access ordering. It's cheaper than explicit LoadLoad barrier
-#if defined(__ABI_HARD__) && !defined(AARCH64)
+#if defined(__ABI_HARD__)
     if (type == T_FLOAT || type == T_DOUBLE) {
       __ ldr_literal(Rsafepoint_counter_addr, safepoint_counter_addr);
       __ fmrrd(Rres, Rres_hi, D0);
       __ eor(Rtmp2, Rres, Rres);
       __ ldr_s32(Rsafept_cnt2, Address(Rsafepoint_counter_addr, Rtmp2));
     } else
-#endif // __ABI_HARD__ && !AARCH64
+#endif // __ABI_HARD__
     {
-#ifndef AARCH64
       __ ldr_literal(Rsafepoint_counter_addr, safepoint_counter_addr);
-#endif // !AARCH64
       __ eor(Rtmp2, Rres, Rres);
       __ ldr_s32(Rsafept_cnt2, Address(Rsafepoint_counter_addr, Rtmp2));
     }
@@ -219,22 +193,14 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
     __ ldr_s32(Rsafept_cnt2, Address(Rsafepoint_counter_addr));
   }
   __ cmp(Rsafept_cnt2, Rsafept_cnt);
-#ifdef AARCH64
-  __ b(slow_case, ne);
-  __ mov(R0, Rres);
-  __ ret();
-#else
   // discards saved R0 R1 R2 R3
   __ add(SP, SP, 4 * wordSize, eq);
   __ bx(LR, eq);
-#endif // AARCH64
 
   slowcase_entry_pclist[count++] = __ pc();
 
   __ bind(slow_case);
-#ifndef AARCH64
   __ pop(RegisterSet(R0, R3));
-#endif // !AARCH64
   // thumb mode switch handled by MacroAssembler::jump if needed
   __ jump(slow_case_addr, relocInfo::none, Rtemp);
 
diff --git a/src/hotspot/cpu/arm/jniTypes_arm.hpp b/src/hotspot/cpu/arm/jniTypes_arm.hpp
index 636f8e81a2c..4c5d7739fd4 100644
--- a/src/hotspot/cpu/arm/jniTypes_arm.hpp
+++ b/src/hotspot/cpu/arm/jniTypes_arm.hpp
@@ -44,12 +44,10 @@ class JNITypes : AllStatic {
 
 private:
 
-#ifndef AARCH64
   // 32bit Helper routines.
   static inline void put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
                                                                         *(jint *)(to  ) = from[0]; }
   static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
-#endif
 
 public:
   // Ints are stored in native format in one JavaCallArgument slot at *to.
@@ -57,18 +55,11 @@ class JNITypes : AllStatic {
   static inline void put_int(jint  from, intptr_t *to, int& pos) { *(jint *)(to + pos++) =  from; }
   static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
 
-#ifdef AARCH64
-  // Longs are stored in native format in one JavaCallArgument slot at *(to+1).
-  static inline void put_long(jlong  from, intptr_t *to)           { *(jlong *)(to + 1 +   0) =  from; }
-  static inline void put_long(jlong  from, intptr_t *to, int& pos) { *(jlong *)(to + 1 + pos) =  from; pos += 2; }
-  static inline void put_long(jlong *from, intptr_t *to, int& pos) { *(jlong *)(to + 1 + pos) = *from; pos += 2; }
-#else
   // Longs are stored in big-endian word format in two JavaCallArgument slots at *to.
   // The high half is in *to and the low half in *(to+1).
   static inline void put_long(jlong  from, intptr_t *to)           { put_int2r((jint *)&from, to); }
   static inline void put_long(jlong  from, intptr_t *to, int& pos) { put_int2r((jint *)&from, to, pos); }
   static inline void put_long(jlong *from, intptr_t *to, int& pos) { put_int2r((jint *) from, to, pos); }
-#endif
 
   // Oops are stored in native format in one JavaCallArgument slot at *to.
   static inline void put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
@@ -80,18 +71,11 @@ class JNITypes : AllStatic {
   static inline void put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
   static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
 
-#ifdef AARCH64
-  // Doubles are stored in native word format in one JavaCallArgument slot at *(to+1).
-  static inline void put_double(jdouble  from, intptr_t *to)           { *(jdouble *)(to + 1 +   0) =  from; }
-  static inline void put_double(jdouble  from, intptr_t *to, int& pos) { *(jdouble *)(to + 1 + pos) =  from; pos += 2; }
-  static inline void put_double(jdouble *from, intptr_t *to, int& pos) { *(jdouble *)(to + 1 + pos) = *from; pos += 2; }
-#else
   // Doubles are stored in big-endian word format in two JavaCallArgument slots at *to.
   // The high half is in *to and the low half in *(to+1).
   static inline void put_double(jdouble  from, intptr_t *to)           { put_int2r((jint *)&from, to); }
   static inline void put_double(jdouble  from, intptr_t *to, int& pos) { put_int2r((jint *)&from, to, pos); }
   static inline void put_double(jdouble *from, intptr_t *to, int& pos) { put_int2r((jint *) from, to, pos); }
-#endif
 
 };
 
diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.cpp b/src/hotspot/cpu/arm/macroAssembler_arm.cpp
index a3f75fa0e2b..54065416332 100644
--- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp
@@ -97,19 +97,6 @@ RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_ad
 }
 
 
-#ifdef AARCH64
-// Note: ARM32 version is OS dependent
-void MacroAssembler::breakpoint(AsmCondition cond) {
-  if (cond == al) {
-    brk();
-  } else {
-    Label L;
-    b(L, inverse(cond));
-    brk();
-    bind(L);
-  }
-}
-#endif // AARCH64
 
 
 // virtual method calling
@@ -210,9 +197,6 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
                                                    Label* L_success,
                                                    Label* L_failure,
                                                    bool set_cond_codes) {
-#ifdef AARCH64
-  NOT_IMPLEMENTED();
-#else
   // Note: if used by code that expects a register to be 0 on success,
   // this register must be temp_reg and set_cond_codes must be true
 
@@ -313,7 +297,6 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
   }
 
   bind(L_fallthrough);
-#endif
 }
 
 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
@@ -342,14 +325,9 @@ int MacroAssembler::set_last_Java_frame(Register last_java_sp,
   } else {
     _fp_saved = false;
   }
-  if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
-#ifdef AARCH64
-    pc_offset = mov_pc_to(tmp);
-    str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
-#else
+  if (save_last_java_pc) {
     str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
     pc_offset = offset() + VM_Version::stored_pc_adjustment();
-#endif
     _pc_saved = true;
   } else {
     _pc_saved = false;
@@ -369,16 +347,7 @@ int MacroAssembler::set_last_Java_frame(Register last_java_sp,
   if (last_java_sp == noreg) {
     last_java_sp = SP; // always saved
   }
-#ifdef AARCH64
-  if (last_java_sp == SP) {
-    mov(tmp, SP);
-    str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
-  } else {
-    str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
-  }
-#else
   str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
-#endif
 
   return pc_offset; // for oopmaps
 }
@@ -401,19 +370,15 @@ void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_argu
   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
   assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
 
-#ifndef AARCH64
   // Safer to save R9 here since callers may have been written
   // assuming R9 survives. This is suboptimal but is not worth
   // optimizing for the few platforms where R9 is scratched.
   push(RegisterSet(R4) | R9ifScratched);
   mov(R4, SP);
   bic(SP, SP, StackAlignmentInBytes - 1);
-#endif // AARCH64
   call(entry_point, relocInfo::runtime_call_type);
-#ifndef AARCH64
   mov(SP, R4);
   pop(RegisterSet(R4) | R9ifScratched);
-#endif // AARCH64
 }
 
 
@@ -426,11 +391,6 @@ void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, in
 
   set_last_Java_frame(SP, FP, true, tmp);
 
-#ifdef ASSERT
-  AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
-#endif // ASSERT
-
-#ifndef AARCH64
 #if R9_IS_SCRATCHED
   // Safer to save R9 here since callers may have been written
   // assuming R9 survives. This is suboptimal but is not worth
@@ -446,17 +406,14 @@ void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, in
 #else
   bic(SP, SP, StackAlignmentInBytes - 1);
 #endif // R9_IS_SCRATCHED
-#endif
 
   mov(R0, Rthread);
   call(entry_point, relocInfo::runtime_call_type);
 
-#ifndef AARCH64
 #if R9_IS_SCRATCHED
   ldr(R9, Address(SP, 0));
 #endif
   ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
-#endif
 
   reset_last_Java_frame(tmp);
 
@@ -467,17 +424,9 @@ void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, in
   if (check_exceptions) {
     // check for pending exceptions
     ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
-#ifdef AARCH64
-    Label L;
-    cbz(tmp, L);
-    mov_pc_to(Rexception_pc);
-    b(StubRoutines::forward_exception_entry());
-    bind(L);
-#else
     cmp(tmp, 0);
     mov(Rexception_pc, PC, ne);
     b(StubRoutines::forward_exception_entry(), ne);
-#endif // AARCH64
   }
 
   // get oop result if there is one and reset the value in the thread
@@ -608,32 +557,6 @@ void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2
 }
 
 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
-#ifdef AARCH64
-  if (c == 0) {
-    if (rd != rn) {
-      mov(rd, rn);
-    }
-    return;
-  }
-  if (c < 0) {
-    sub_slow(rd, rn, -c);
-    return;
-  }
-  if (c > right_n_bits(24)) {
-    guarantee(rd != rn, "no large add_slow with only one register");
-    mov_slow(rd, c);
-    add(rd, rn, rd);
-  } else {
-    int lo = c & right_n_bits(12);
-    int hi = (c >> 12) & right_n_bits(12);
-    if (lo != 0) {
-      add(rd, rn, lo, lsl0);
-    }
-    if (hi != 0) {
-      add(rd, (lo == 0) ? rn : rd, hi, lsl12);
-    }
-  }
-#else
   // This function is used in compiler for handling large frame offsets
   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
     return sub(rd, rn, (-c));
@@ -650,30 +573,9 @@ void MacroAssembler::add_slow(Register rd, Register rn, int c) {
     assert(c == 0, "");
     mov(rd, rn); // need to generate at least one move!
   }
-#endif // AARCH64
 }
 
 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
-#ifdef AARCH64
-  if (c <= 0) {
-    add_slow(rd, rn, -c);
-    return;
-  }
-  if (c > right_n_bits(24)) {
-    guarantee(rd != rn, "no large sub_slow with only one register");
-    mov_slow(rd, c);
-    sub(rd, rn, rd);
-  } else {
-    int lo = c & right_n_bits(12);
-    int hi = (c >> 12) & right_n_bits(12);
-    if (lo != 0) {
-      sub(rd, rn, lo, lsl0);
-    }
-    if (hi != 0) {
-      sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
-    }
-  }
-#else
   // This function is used in compiler for handling large frame offsets
   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
     return add(rd, rn, (-c));
@@ -690,7 +592,6 @@ void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
     assert(c == 0, "");
     mov(rd, rn); // need to generate at least one move!
   }
-#endif // AARCH64
 }
 
 void MacroAssembler::mov_slow(Register rd, address addr) {
@@ -702,99 +603,6 @@ void MacroAssembler::mov_slow(Register rd, const char *str) {
   mov_slow(rd, (intptr_t)str);
 }
 
-#ifdef AARCH64
-
-// Common code for mov_slow and instr_count_for_mov_slow.
-// Returns number of instructions of mov_slow pattern,
-// generating it if non-null MacroAssembler is given.
-int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
-  // This code pattern is matched in NativeIntruction::is_mov_slow.
-  // Update it at modifications.
-
-  const intx mask = right_n_bits(16);
-  // 1 movz instruction
-  for (int base_shift = 0; base_shift < 64; base_shift += 16) {
-    if ((c & ~(mask << base_shift)) == 0) {
-      if (masm != NULL) {
-        masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
-      }
-      return 1;
-    }
-  }
-  // 1 movn instruction
-  for (int base_shift = 0; base_shift < 64; base_shift += 16) {
-    if (((~c) & ~(mask << base_shift)) == 0) {
-      if (masm != NULL) {
-        masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
-      }
-      return 1;
-    }
-  }
-  // 1 orr instruction
-  {
-    LogicalImmediate imm(c, false);
-    if (imm.is_encoded()) {
-      if (masm != NULL) {
-        masm->orr(rd, ZR, imm);
-      }
-      return 1;
-    }
-  }
-  // 1 movz/movn + up to 3 movk instructions
-  int zeroes = 0;
-  int ones = 0;
-  for (int base_shift = 0; base_shift < 64; base_shift += 16) {
-    int part = (c >> base_shift) & mask;
-    if (part == 0) {
-      ++zeroes;
-    } else if (part == mask) {
-      ++ones;
-    }
-  }
-  int def_bits = 0;
-  if (ones > zeroes) {
-    def_bits = mask;
-  }
-  int inst_count = 0;
-  for (int base_shift = 0; base_shift < 64; base_shift += 16) {
-    int part = (c >> base_shift) & mask;
-    if (part != def_bits) {
-      if (masm != NULL) {
-        if (inst_count > 0) {
-          masm->movk(rd, part, base_shift);
-        } else {
-          if (def_bits == 0) {
-            masm->movz(rd, part, base_shift);
-          } else {
-            masm->movn(rd, ~part & mask, base_shift);
-          }
-        }
-      }
-      inst_count++;
-    }
-  }
-  assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
-  return inst_count;
-}
-
-void MacroAssembler::mov_slow(Register rd, intptr_t c) {
-#ifdef ASSERT
-  int off = offset();
-#endif
-  (void) mov_slow_helper(rd, c, this);
-  assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
-}
-
-// Counts instructions generated by mov_slow(rd, c).
-int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
-  return mov_slow_helper(noreg, c, NULL);
-}
-
-int MacroAssembler::instr_count_for_mov_slow(address c) {
-  return mov_slow_helper(noreg, (intptr_t)c, NULL);
-}
-
-#else
 
 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
   if (AsmOperand::is_rotated_imm(c)) {
@@ -829,25 +637,13 @@ void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
   }
 }
 
-#endif // AARCH64
 
 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
-#ifdef AARCH64
-                             bool patchable
-#else
                              AsmCondition cond
-#endif
                              ) {
 
   if (o == NULL) {
-#ifdef AARCH64
-    if (patchable) {
-      nop();
-    }
-    mov(rd, ZR);
-#else
     mov(rd, 0, cond);
-#endif
     return;
   }
 
@@ -856,12 +652,6 @@ void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
   }
   relocate(oop_Relocation::spec(oop_index));
 
-#ifdef AARCH64
-  if (patchable) {
-    nop();
-  }
-  ldr(rd, pc());
-#else
   if (VM_Version::supports_movw()) {
     movw(rd, 0, cond);
     movt(rd, 0, cond);
@@ -870,16 +660,10 @@ void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
     // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
     nop();
   }
-#endif
 }
 
-void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
+void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index) {
   if (o == NULL) {
-#ifdef AARCH64
-    if (patchable) {
-      nop();
-    }
-#endif
     mov(rd, 0);
     return;
   }
@@ -889,18 +673,6 @@ void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index A
   }
   relocate(metadata_Relocation::spec(metadata_index));
 
-#ifdef AARCH64
-  if (patchable) {
-    nop();
-  }
-#ifdef COMPILER2
-  if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
-    mov_slow(rd, (address)o);
-    return;
-  }
-#endif
-  ldr(rd, pc());
-#else
   if (VM_Version::supports_movw()) {
     movw(rd, ((int)o) & 0xffff);
     movt(rd, (unsigned int)o >> 16);
@@ -909,10 +681,9 @@ void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index A
     // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
     nop();
   }
-#endif // AARCH64
 }
 
-void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
+void MacroAssembler::mov_float(FloatRegister fd, jfloat c, AsmCondition cond) {
   Label skip_constant;
   union {
     jfloat f;
@@ -920,23 +691,13 @@ void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCon
   } accessor;
   accessor.f = c;
 
-#ifdef AARCH64
-  // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
-  Label L;
-  ldr_s(fd, target(L));
-  b(skip_constant);
-  bind(L);
-  emit_int32(accessor.i);
-  bind(skip_constant);
-#else
   flds(fd, Address(PC), cond);
   b(skip_constant);
   emit_int32(accessor.i);
   bind(skip_constant);
-#endif // AARCH64
 }
 
-void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
+void MacroAssembler::mov_double(FloatRegister fd, jdouble c, AsmCondition cond) {
   Label skip_constant;
   union {
     jdouble d;
@@ -944,55 +705,21 @@ void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmC
   } accessor;
   accessor.d = c;
 
-#ifdef AARCH64
-  // TODO-AARCH64 - try to optimize loading of double constants with fmov
-  Label L;
-  ldr_d(fd, target(L));
-  b(skip_constant);
-  align(wordSize);
-  bind(L);
-  emit_int32(accessor.i[0]);
-  emit_int32(accessor.i[1]);
-  bind(skip_constant);
-#else
   fldd(fd, Address(PC), cond);
   b(skip_constant);
   emit_int32(accessor.i[0]);
   emit_int32(accessor.i[1]);
   bind(skip_constant);
-#endif // AARCH64
 }
 
 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
   intptr_t addr = (intptr_t) address_of_global;
-#ifdef AARCH64
-  assert((addr & 0x3) == 0, "address should be aligned");
-
-  // FIXME: TODO
-  if (false && page_reachable_from_cache(address_of_global)) {
-    assert(false,"TODO: relocate");
-    //relocate();
-    adrp(reg, address_of_global);
-    ldrsw(reg, Address(reg, addr & 0xfff));
-  } else {
-    mov_slow(reg, addr & ~0x3fff);
-    ldrsw(reg, Address(reg, addr & 0x3fff));
-  }
-#else
   mov_slow(reg, addr & ~0xfff);
   ldr(reg, Address(reg, addr & 0xfff));
-#endif
 }
 
 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
-#ifdef AARCH64
-  intptr_t addr = (intptr_t) address_of_global;
-  assert ((addr & 0x7) == 0, "address should be aligned");
-  mov_slow(reg, addr & ~0x7fff);
-  ldr(reg, Address(reg, addr & 0x7fff));
-#else
   ldr_global_s32(reg, address_of_global);
-#endif
 }
 
 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
@@ -1002,14 +729,6 @@ void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
 }
 
 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
-#ifdef AARCH64
-  switch (bits) {
-    case  8: uxtb(rd, rn); break;
-    case 16: uxth(rd, rn); break;
-    case 32: mov_w(rd, rn); break;
-    default: ShouldNotReachHere();
-  }
-#else
   if (bits <= 8) {
     andr(rd, rn, (1 << bits) - 1);
   } else if (bits >= 24) {
@@ -1018,24 +737,13 @@ void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
     mov(rd, AsmOperand(rn, lsl, 32 - bits));
     mov(rd, AsmOperand(rd, lsr, 32 - bits));
   }
-#endif
 }
 
 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
-#ifdef AARCH64
-  switch (bits) {
-    case  8: sxtb(rd, rn); break;
-    case 16: sxth(rd, rn); break;
-    case 32: sxtw(rd, rn); break;
-    default: ShouldNotReachHere();
-  }
-#else
   mov(rd, AsmOperand(rn, lsl, 32 - bits));
   mov(rd, AsmOperand(rd, asr, 32 - bits));
-#endif
 }
 
-#ifndef AARCH64
 
 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
                                Register rn_lo, Register rn_hi,
@@ -1129,7 +837,6 @@ void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
     }
   }
 }
-#endif // !AARCH64
 
 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
   // This code pattern is matched in NativeIntruction::skip_verify_oop.
@@ -1231,18 +938,11 @@ void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* f
 
 void MacroAssembler::c2bool(Register x) {
   tst(x, 0xff);   // Only look at the lowest byte
-#ifdef AARCH64
-  cset(x, ne);
-#else
   mov(x, 1, ne);
-#endif
 }
 
 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
   if (needs_explicit_null_check(offset)) {
-#ifdef AARCH64
-    ldr(ZR, Address(reg));
-#else
     assert_different_registers(reg, tmp);
     if (tmp == noreg) {
       tmp = Rtemp;
@@ -1253,7 +953,6 @@ void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
       // XXX: could we mark the code buffer as not compatible with C2 ?
     }
     ldr(tmp, Address(reg));
-#endif
   }
 }
 
@@ -1276,7 +975,7 @@ void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1
     assert_different_registers(obj, obj_end, top_addr, heap_end);
   }
 
-  bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
+  bool load_const = VM_Version::supports_movw();
   if (load_const) {
     mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
   } else {
@@ -1286,11 +985,7 @@ void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1
   Label retry;
   bind(retry);
 
-#ifdef AARCH64
-  ldxr(obj, top_addr);
-#else
   ldr(obj, Address(top_addr));
-#endif // AARCH64
 
   ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
   add_rc(obj_end, obj, size_expression);
@@ -1301,13 +996,8 @@ void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1
   cmp(obj_end, heap_end);
   b(slow_case, hi);
 
-#ifdef AARCH64
-  stxr(heap_end/*scratched*/, obj_end, top_addr);
-  cbnz_w(heap_end, retry);
-#else
   atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
   b(retry, ne);
-#endif // AARCH64
 }
 
 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
@@ -1329,50 +1019,14 @@ void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
   Label loop;
   const Register ptr = start;
 
-#ifdef AARCH64
-  // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
-  const Register size = tmp;
-  Label remaining, done;
-
-  sub(size, end, start);
-
-#ifdef ASSERT
-  { Label L;
-    tst(size, wordSize - 1);
-    b(L, eq);
-    stop("size is not a multiple of wordSize");
-    bind(L);
-  }
-#endif // ASSERT
-
-  subs(size, size, wordSize);
-  b(remaining, le);
-
-  // Zero by 2 words per iteration.
-  bind(loop);
-  subs(size, size, 2*wordSize);
-  stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
-  b(loop, gt);
-
-  bind(remaining);
-  b(done, ne);
-  str(ZR, Address(ptr));
-  bind(done);
-#else
   mov(tmp, 0);
   bind(loop);
   cmp(ptr, end);
   str(tmp, Address(ptr, wordSize, post_indexed), lo);
   b(loop, lo);
-#endif // AARCH64
 }
 
 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
-#ifdef AARCH64
-  ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
-  add_rc(tmp, tmp, size_in_bytes);
-  str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
-#else
   // Bump total bytes allocated by this thread
   Label done;
 
@@ -1410,7 +1064,6 @@ void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Regi
 
   // Unborrow the Rthread
   sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
-#endif // AARCH64
 }
 
 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
@@ -1420,16 +1073,9 @@ void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register
 
     sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
     strb(R0, Address(tmp));
-#ifdef AARCH64
-    for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
-      sub(tmp, tmp, page_size);
-      strb(R0, Address(tmp));
-    }
-#else
     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
       strb(R0, Address(tmp, -0xff0, pre_indexed));
     }
-#endif // AARCH64
   }
 }
 
@@ -1439,16 +1085,9 @@ void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
 
     mov(tmp, SP);
     add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
-#ifdef AARCH64
-    sub(tmp, tmp, Rsize);
-    bind(loop);
-    subs(Rsize, Rsize, os::vm_page_size());
-    strb(ZR, Address(tmp, Rsize));
-#else
     bind(loop);
     subs(Rsize, Rsize, 0xff0);
     strb(R0, Address(tmp, -0xff0, pre_indexed));
-#endif // AARCH64
     b(loop, hi);
   }
 }
@@ -1471,24 +1110,10 @@ void MacroAssembler::stop(const char* msg) {
   ldr_literal(R0, Lmsg);                     // message
   mov(R1, SP);                               // register save area
 
-#ifdef AARCH64
-  ldr_literal(Rtemp, Ldebug);
-  br(Rtemp);
-#else
   ldr_literal(PC, Ldebug);                   // call MacroAssembler::debug
-#endif // AARCH64
 
-#if defined(COMPILER2) && defined(AARCH64)
-  int off = offset();
-#endif
   bind_literal(Lmsg);
   bind_literal(Ldebug);
-#if defined(COMPILER2) && defined(AARCH64)
-  if (offset() - off == 2 * wordSize) {
-    // no padding, so insert nop for worst-case sizing
-    nop();
-  }
-#endif
 }
 
 void MacroAssembler::warn(const char* msg) {
@@ -1504,12 +1129,6 @@ void MacroAssembler::warn(const char* msg) {
 
   int push_size = save_caller_save_registers();
 
-#ifdef AARCH64
-  // TODO-AARCH64 - get rid of extra debug parameters
-  mov(R1, LR);
-  mov(R2, FP);
-  add(R3, SP, push_size);
-#endif
 
   ldr_literal(R0, Lmsg);                    // message
   ldr_literal(LR, Lwarn);                   // call warning
@@ -1528,42 +1147,16 @@ void MacroAssembler::warn(const char* msg) {
 int MacroAssembler::save_all_registers() {
   // This code pattern is matched in NativeIntruction::is_save_all_registers.
   // Update it at modifications.
-#ifdef AARCH64
-  const Register tmp = Rtemp;
-  raw_push(R30, ZR);
-  for (int i = 28; i >= 0; i -= 2) {
-      raw_push(as_Register(i), as_Register(i+1));
-  }
-  mov_pc_to(tmp);
-  str(tmp, Address(SP, 31*wordSize));
-  ldr(tmp, Address(SP, tmp->encoding()*wordSize));
-  return 32*wordSize;
-#else
   push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
   return 15*wordSize;
-#endif // AARCH64
 }
 
 void MacroAssembler::restore_all_registers() {
-#ifdef AARCH64
-  for (int i = 0; i <= 28; i += 2) {
-    raw_pop(as_Register(i), as_Register(i+1));
-  }
-  raw_pop(R30, ZR);
-#else
   pop(RegisterSet(R0, R12) | RegisterSet(LR));   // restore registers
   add(SP, SP, wordSize);                         // discard saved PC
-#endif // AARCH64
 }
 
 int MacroAssembler::save_caller_save_registers() {
-#ifdef AARCH64
-  for (int i = 0; i <= 16; i += 2) {
-    raw_push(as_Register(i), as_Register(i+1));
-  }
-  raw_push(R18, LR);
-  return 20*wordSize;
-#else
 #if R9_IS_SCRATCHED
   // Save also R10 to preserve alignment
   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
@@ -1572,22 +1165,14 @@ int MacroAssembler::save_caller_save_registers() {
   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
   return 6*wordSize;
 #endif
-#endif // AARCH64
 }
 
 void MacroAssembler::restore_caller_save_registers() {
-#ifdef AARCH64
-  raw_pop(R18, LR);
-  for (int i = 16; i >= 0; i -= 2) {
-    raw_pop(as_Register(i), as_Register(i+1));
-  }
-#else
 #if R9_IS_SCRATCHED
   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
 #else
   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
 #endif
-#endif // AARCH64
 }
 
 void MacroAssembler::debug(const char* msg, const intx* registers) {
@@ -1601,23 +1186,14 @@ void MacroAssembler::debug(const char* msg, const intx* registers) {
       BytecodeCounter::print();
     }
     if (os::message_box(msg, "Execution stopped, print registers?")) {
-#ifdef AARCH64
-      // saved registers: R0-R30, PC
-      const int nregs = 32;
-#else
       // saved registers: R0-R12, LR, PC
       const int nregs = 15;
       const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
-#endif // AARCH64
 
-      for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
-        tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
+      for (int i = 0; i < nregs; i++) {
+        tty->print_cr("%s = " INTPTR_FORMAT, regs[i]->name(), registers[i]);
       }
 
-#ifdef AARCH64
-      tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
-#endif // AARCH64
-
       // derive original SP value from the address of register save area
       tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(&registers[nregs]));
     }
@@ -1661,24 +1237,6 @@ FixedSizeCodeBlock::~FixedSizeCodeBlock() {
   }
 }
 
-#ifdef AARCH64
-
-// Serializes memory.
-// tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
-void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
-  if (!os::is_MP()) return;
-
-  // TODO-AARCH64 investigate dsb vs dmb effects
-  if (order_constraint == StoreStore) {
-    dmb(DMB_st);
-  } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
-    dmb(DMB_ld);
-  } else {
-    dmb(DMB_all);
-  }
-}
-
-#else
 
 // Serializes memory. Potentially blows flags and reg.
 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
@@ -1709,7 +1267,6 @@ void MacroAssembler::membar(Membar_mask_bits order_constraint,
   }
 }
 
-#endif // AARCH64
 
 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
 // on failure, so fall-through can only mean success.
@@ -1732,36 +1289,6 @@ void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
   // reordering we must issue a StoreStore or Release barrier before
   // the CAS store.
 
-#ifdef AARCH64
-
-  Register Rscratch = tmp;
-  Register Roop = base;
-  Register mark = oldval;
-  Register Rbox = newval;
-  Label loop;
-
-  assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
-
-  // Instead of StoreStore here, we use store-release-exclusive below
-
-  bind(loop);
-
-  ldaxr(tmp, base);  // acquire
-  cmp(tmp, oldval);
-  b(slow_case, ne);
-  stlxr(tmp, newval, base); // release
-  if (one_shot) {
-    cmp_w(tmp, 0);
-  } else {
-    cbnz_w(tmp, loop);
-    fallthrough_is_success = true;
-  }
-
-  // MemBarAcquireLock would normally go here, but
-  // we already do ldaxr+stlxr above, which has
-  // Sequential Consistency
-
-#else
   membar(MacroAssembler::StoreStore, noreg);
 
   if (one_shot) {
@@ -1779,7 +1306,6 @@ void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
   // the load and store in the CAS sequence, so play it safe and
   // do a full fence.
   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
-#endif
   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
     b(slow_case, ne);
   }
@@ -1794,24 +1320,6 @@ void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
 
   assert_different_registers(oldval,newval,base,tmp);
 
-#ifdef AARCH64
-  Label loop;
-
-  assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
-
-  bind(loop);
-  ldxr(tmp, base);
-  cmp(tmp, oldval);
-  b(slow_case, ne);
-  // MemBarReleaseLock barrier
-  stlxr(tmp, newval, base);
-  if (one_shot) {
-    cmp_w(tmp, 0);
-  } else {
-    cbnz_w(tmp, loop);
-    fallthrough_is_success = true;
-  }
-#else
   // MemBarReleaseLock barrier
   // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
   // but that doesn't prevent a load or store from floating down between
@@ -1827,7 +1335,6 @@ void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
   } else {
     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
   }
-#endif
   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
     b(slow_case, ne);
   }
@@ -1852,21 +1359,6 @@ void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
       b(done, inverse(cond));
     }
 
-#ifdef AARCH64
-    raw_push(R0, R1);
-    raw_push(R2, ZR);
-
-    ldr_literal(R0, counter_addr_literal);
-
-    bind(retry);
-    ldxr_w(R1, R0);
-    add_w(R1, R1, 1);
-    stxr_w(R2, R1, R0);
-    cbnz_w(R2, retry);
-
-    raw_pop(R2, ZR);
-    raw_pop(R0, R1);
-#else
     push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
     ldr_literal(R0, counter_addr_literal);
 
@@ -1881,7 +1373,6 @@ void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
     msr(CPSR_fsxc, Rtemp);
 
     pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
-#endif // AARCH64
 
     b(done);
     bind_literal(counter_addr_literal);
@@ -1967,11 +1458,7 @@ int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Re
   orr(tmp_reg, tmp_reg, Rthread);
   eor(tmp_reg, tmp_reg, swap_reg);
 
-#ifdef AARCH64
-  ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
-#else
   bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
-#endif // AARCH64
 
 #ifndef PRODUCT
   if (counters != NULL) {
@@ -2021,19 +1508,12 @@ int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Re
   // Note that we know the owner is not ourself. Hence, success can
   // only happen when the owner bits is 0
 
-#ifdef AARCH64
-  // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
-  // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
-  mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
-  andr(swap_reg, swap_reg, tmp2);
-#else
   // until the assembler can be made smarter, we need to make some assumptions about the values
   // so we can optimize this:
   assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
 
   mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
   mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
-#endif // AARCH64
 
   orr(tmp_reg, swap_reg, Rthread); // new mark
 
@@ -2061,13 +1541,8 @@ int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Re
   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
 
   // owner bits 'random'. Set them to Rthread.
-#ifdef AARCH64
-  mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
-  andr(tmp_reg, tmp_reg, tmp2);
-#else
   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
-#endif // AARCH64
 
   orr(tmp_reg, tmp_reg, Rthread); // new mark
 
@@ -2096,13 +1571,8 @@ int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Re
   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
 
   // owner bits 'random'. Clear them
-#ifdef AARCH64
-  mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
-  andr(tmp_reg, tmp_reg, tmp2);
-#else
   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
-#endif // AARCH64
 
   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
         (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
@@ -2158,29 +1628,6 @@ void MacroAssembler::resolve_jobject(Register value,
 
 //////////////////////////////////////////////////////////////////////////////////
 
-#ifdef AARCH64
-
-void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
-  switch (size_in_bytes) {
-    case  8: ldr(dst, src); break;
-    case  4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
-    case  2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
-    case  1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
-    default: ShouldNotReachHere();
-  }
-}
-
-void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
-  switch (size_in_bytes) {
-    case  8: str(src, dst);    break;
-    case  4: str_32(src, dst); break;
-    case  2: strh(src, dst);   break;
-    case  1: strb(src, dst);   break;
-    default: ShouldNotReachHere();
-  }
-}
-
-#else
 
 void MacroAssembler::load_sized_value(Register dst, Address src,
                                     size_t size_in_bytes, bool is_signed, AsmCondition cond) {
@@ -2201,7 +1648,6 @@ void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in
     default: ShouldNotReachHere();
   }
 }
-#endif // AARCH64
 
 // Look up the method for a megamorphic invokeinterface call.
 // The target method is determined by <Rinterf, Rindex>.
@@ -2234,24 +1680,12 @@ void MacroAssembler::lookup_interface_method(Register Rklass,
   Label loop;
   bind(loop);
   ldr(Rtmp, Address(Rscan, entry_size, post_indexed));
-#ifdef AARCH64
-  Label found;
-  cmp(Rtmp, Rintf);
-  b(found, eq);
-  cbnz(Rtmp, loop);
-#else
   cmp(Rtmp, Rintf);  // set ZF and CF if interface is found
   cmn(Rtmp, 0, ne);  // check if tmp == 0 and clear CF if it is
   b(loop, ne);
-#endif // AARCH64
 
-#ifdef AARCH64
-  b(L_no_such_interface);
-  bind(found);
-#else
   // CF == 0 means we reached the end of itable without finding icklass
   b(L_no_such_interface, cc);
-#endif // !AARCH64
 
   if (method_result != noreg) {
     // Interface found at previous position of Rscan, now load the method
@@ -2325,31 +1759,20 @@ void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Registe
 }
 
 void MacroAssembler::floating_cmp(Register dst) {
-#ifdef AARCH64
-  NOT_TESTED();
-  cset(dst, gt);            // 1 if '>', else 0
-  csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
-#else
   vmrs(dst, FPSCR);
   orr(dst, dst, 0x08000000);
   eor(dst, dst, AsmOperand(dst, lsl, 3));
   mov(dst, AsmOperand(dst, asr, 30));
-#endif
 }
 
 void MacroAssembler::restore_default_fp_mode() {
-#ifdef AARCH64
-  msr(SysReg_FPCR, ZR);
-#else
 #ifndef __SOFTFP__
   // Round to Near mode, IEEE compatible, masked exceptions
   mov(Rtemp, 0);
   vmsr(FPSCR, Rtemp);
 #endif // !__SOFTFP__
-#endif // AARCH64
 }
 
-#ifndef AARCH64
 // 24-bit word range == 26-bit byte range
 bool check26(int offset) {
   // this could be simplified, but it mimics encoding and decoding
@@ -2359,7 +1782,6 @@ bool check26(int offset) {
   int decoded = encoded << 8 >> 6;
   return offset == decoded;
 }
-#endif // !AARCH64
 
 // Perform some slight adjustments so the default 32MB code cache
 // is fully reachable.
@@ -2370,18 +1792,6 @@ static inline address last_cache_address() {
   return CodeCache::high_bound() - Assembler::InstructionSize;
 }
 
-#ifdef AARCH64
-// Can we reach target using ADRP?
-bool MacroAssembler::page_reachable_from_cache(address target) {
-  intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
-  intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
-  intptr_t addr = (intptr_t)target & ~0xfff;
-
-  intptr_t loffset = addr - cl;
-  intptr_t hoffset = addr - ch;
-  return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
-}
-#endif
 
 // Can we reach target using unconditional branch or call from anywhere
 // in the code cache (because code can be relocated)?
@@ -2406,11 +1816,7 @@ bool MacroAssembler::_reachable_from_cache(address target) {
   intptr_t loffset = (intptr_t)target - (intptr_t)cl;
   intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
 
-#ifdef AARCH64
-  return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
-#else
   return check26(loffset - 8) && check26(hoffset - 8);
-#endif
 }
 
 bool MacroAssembler::reachable_from_cache(address target) {
@@ -2430,11 +1836,11 @@ bool MacroAssembler::cache_fully_reachable() {
   return _cache_fully_reachable();
 }
 
-void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
+void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch, AsmCondition cond) {
   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
   if (reachable_from_cache(target)) {
     relocate(rtype);
-    b(target NOT_AARCH64_ARG(cond));
+    b(target, cond);
     return;
   }
 
@@ -2444,20 +1850,6 @@ void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register s
     rtype = relocInfo::none;
   }
 
-#ifdef AARCH64
-  assert (scratch != noreg, "should be specified");
-  InlinedAddress address_literal(target, rtype);
-  ldr_literal(scratch, address_literal);
-  br(scratch);
-  int off = offset();
-  bind_literal(address_literal);
-#ifdef COMPILER2
-  if (offset() - off == wordSize) {
-    // no padding, so insert nop for worst-case sizing
-    nop();
-  }
-#endif
-#else
   if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
     // Note: this version cannot be (atomically) patched
     mov_slow(scratch, (intptr_t)target, cond);
@@ -2473,20 +1865,19 @@ void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register s
     bind_literal(address_literal);
     bind(skip);
   }
-#endif // AARCH64
 }
 
 // Similar to jump except that:
 // - near calls are valid only if any destination in the cache is near
 // - no movt/movw (not atomically patchable)
-void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
+void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch, AsmCondition cond) {
   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
   if (cache_fully_reachable()) {
     // Note: this assumes that all possible targets (the initial one
     // and the addressed patched to) are all in the code cache.
     assert(CodeCache::contains(target), "target might be too far");
     relocate(rtype);
-    b(target NOT_AARCH64_ARG(cond));
+    b(target, cond);
     return;
   }
 
@@ -2496,21 +1887,6 @@ void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype,
     rtype = relocInfo::none;
   }
 
-#ifdef AARCH64
-  assert (scratch != noreg, "should be specified");
-  InlinedAddress address_literal(target);
-  relocate(rtype);
-  ldr_literal(scratch, address_literal);
-  br(scratch);
-  int off = offset();
-  bind_literal(address_literal);
-#ifdef COMPILER2
-  if (offset() - off == wordSize) {
-    // no padding, so insert nop for worst-case sizing
-    nop();
-  }
-#endif
-#else
   {
     Label skip;
     InlinedAddress address_literal(target);
@@ -2522,15 +1898,14 @@ void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype,
     bind_literal(address_literal);
     bind(skip);
   }
-#endif // AARCH64
 }
 
-void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
+void MacroAssembler::call(address target, RelocationHolder rspec, AsmCondition cond) {
   Register scratch = LR;
   assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
   if (reachable_from_cache(target)) {
     relocate(rspec);
-    bl(target NOT_AARCH64_ARG(cond));
+    bl(target, cond);
     return;
   }
 
@@ -2541,31 +1916,20 @@ void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG
     rspec = RelocationHolder::none;
   }
 
-#ifndef AARCH64
   if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
     // Note: this version cannot be (atomically) patched
     mov_slow(scratch, (intptr_t)target, cond);
     blx(scratch, cond);
     return;
   }
-#endif
 
   {
     Label ret_addr;
-#ifndef AARCH64
     if (cond != al) {
       b(ret_addr, inverse(cond));
     }
-#endif
 
 
-#ifdef AARCH64
-    // TODO-AARCH64: make more optimal implementation
-    // [ Keep in sync with MacroAssembler::call_size ]
-    assert(rspec.type() == relocInfo::none, "call reloc not implemented");
-    mov_slow(scratch, target);
-    blr(scratch);
-#else
     InlinedAddress address_literal(target);
     relocate(rspec);
     adr(LR, ret_addr);
@@ -2573,18 +1937,9 @@ void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG
 
     bind_literal(address_literal);
     bind(ret_addr);
-#endif
   }
 }
 
-#if defined(AARCH64) && defined(COMPILER2)
-int MacroAssembler::call_size(address target, bool far, bool patchable) {
-  // FIXME: mov_slow is variable-length
-  if (!far) return 1; // bl
-  if (patchable) return 2;  // ldr; blr
-  return instr_count_for_mov_slow((intptr_t)target) + 1;
-}
-#endif
 
 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
   assert(rspec.type() == relocInfo::static_call_type ||
@@ -2599,38 +1954,10 @@ int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec
     assert(CodeCache::contains(target), "target might be too far");
     bl(target);
   } else {
-#if defined(AARCH64) && defined(COMPILER2)
-    if (c2) {
-      // return address needs to match call_size().
-      // no need to trash Rtemp
-      int off = offset();
-      Label skip_literal;
-      InlinedAddress address_literal(target);
-      ldr_literal(LR, address_literal);
-      blr(LR);
-      int ret_addr_offset = offset();
-      assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
-      b(skip_literal);
-      int off2 = offset();
-      bind_literal(address_literal);
-      if (offset() - off2 == wordSize) {
-        // no padding, so insert nop for worst-case sizing
-        nop();
-      }
-      bind(skip_literal);
-      return ret_addr_offset;
-    }
-#endif
     Label ret_addr;
     InlinedAddress address_literal(target);
-#ifdef AARCH64
-    ldr_literal(Rtemp, address_literal);
-    adr(LR, ret_addr);
-    br(Rtemp);
-#else
     adr(LR, ret_addr);
     ldr_literal(PC, address_literal);
-#endif
     bind_literal(address_literal);
     bind(ret_addr);
   }
@@ -2657,47 +1984,17 @@ void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp)
 
 // Compressed pointers
 
-#ifdef AARCH64
-
-void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
-  if (UseCompressedClassPointers) {
-    ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
-    decode_klass_not_null(dst_klass);
-  } else {
-    ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
-  }
-}
-
-#else
 
 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
   ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
 }
 
-#endif // AARCH64
 
 // Blows src_klass.
 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
-#ifdef AARCH64
-  if (UseCompressedClassPointers) {
-    assert(src_klass != dst_oop, "not enough registers");
-    encode_klass_not_null(src_klass);
-    str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
-    return;
-  }
-#endif // AARCH64
   str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
 }
 
-#ifdef AARCH64
-
-void MacroAssembler::store_klass_gap(Register dst) {
-  if (UseCompressedClassPointers) {
-    str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
-  }
-}
-
-#endif // AARCH64
 
 
 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
@@ -2737,264 +2034,6 @@ void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
   }
 }
 
-
-#ifdef AARCH64
-
-// Algorithm must match oop.inline.hpp encode_heap_oop.
-void MacroAssembler::encode_heap_oop(Register dst, Register src) {
-  // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
-  // Update it at modifications.
-  assert (UseCompressedOops, "must be compressed");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
-#endif
-  verify_oop(src);
-  if (Universe::narrow_oop_base() == NULL) {
-    if (Universe::narrow_oop_shift() != 0) {
-      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-      _lsr(dst, src, Universe::narrow_oop_shift());
-    } else if (dst != src) {
-      mov(dst, src);
-    }
-  } else {
-    tst(src, src);
-    csel(dst, Rheap_base, src, eq);
-    sub(dst, dst, Rheap_base);
-    if (Universe::narrow_oop_shift() != 0) {
-      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-      _lsr(dst, dst, Universe::narrow_oop_shift());
-    }
-  }
-}
-
-// Same algorithm as oop.inline.hpp decode_heap_oop.
-void MacroAssembler::decode_heap_oop(Register dst, Register src) {
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
-#endif
-  assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-  if (Universe::narrow_oop_base() != NULL) {
-    tst(src, src);
-    add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
-    csel(dst, dst, ZR, ne);
-  } else {
-    _lsl(dst, src, Universe::narrow_oop_shift());
-  }
-  verify_oop(dst);
-}
-
-#ifdef COMPILER2
-// Algorithm must match oop.inline.hpp encode_heap_oop.
-// Must preserve condition codes, or C2 encodeHeapOop_not_null rule
-// must be changed.
-void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
-  assert (UseCompressedOops, "must be compressed");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
-#endif
-  verify_oop(src);
-  if (Universe::narrow_oop_base() == NULL) {
-    if (Universe::narrow_oop_shift() != 0) {
-      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-      _lsr(dst, src, Universe::narrow_oop_shift());
-    } else if (dst != src) {
-          mov(dst, src);
-    }
-  } else {
-    sub(dst, src, Rheap_base);
-    if (Universe::narrow_oop_shift() != 0) {
-      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-      _lsr(dst, dst, Universe::narrow_oop_shift());
-    }
-  }
-}
-
-// Same algorithm as oops.inline.hpp decode_heap_oop.
-// Must preserve condition codes, or C2 decodeHeapOop_not_null rule
-// must be changed.
-void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
-#endif
-  assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-  if (Universe::narrow_oop_base() != NULL) {
-    add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
-  } else {
-    _lsl(dst, src, Universe::narrow_oop_shift());
-  }
-  verify_oop(dst);
-}
-
-void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
-  assert(UseCompressedClassPointers, "should only be used for compressed header");
-  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  int klass_index = oop_recorder()->find_index(k);
-  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-
-  // Relocation with special format (see relocInfo_arm.hpp).
-  relocate(rspec);
-  narrowKlass encoded_k = Klass::encode_klass(k);
-  movz(dst, encoded_k & 0xffff, 0);
-  movk(dst, (encoded_k >> 16) & 0xffff, 16);
-}
-
-void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-  assert(UseCompressedOops, "should only be used for compressed header");
-  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  int oop_index = oop_recorder()->find_index(obj);
-  RelocationHolder rspec = oop_Relocation::spec(oop_index);
-
-  relocate(rspec);
-  movz(dst, 0xffff, 0);
-  movk(dst, 0xffff, 16);
-}
-
-#endif // COMPILER2
-// Must preserve condition codes, or C2 encodeKlass_not_null rule
-// must be changed.
-void MacroAssembler::encode_klass_not_null(Register r) {
-  if (Universe::narrow_klass_base() != NULL) {
-    // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
-    assert(r != Rheap_base, "Encoding a klass in Rheap_base");
-    mov_slow(Rheap_base, Universe::narrow_klass_base());
-    sub(r, r, Rheap_base);
-  }
-  if (Universe::narrow_klass_shift() != 0) {
-    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-    _lsr(r, r, Universe::narrow_klass_shift());
-  }
-  if (Universe::narrow_klass_base() != NULL) {
-    reinit_heapbase();
-  }
-}
-
-// Must preserve condition codes, or C2 encodeKlass_not_null rule
-// must be changed.
-void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
-  if (dst == src) {
-    encode_klass_not_null(src);
-    return;
-  }
-  if (Universe::narrow_klass_base() != NULL) {
-    mov_slow(dst, (int64_t)Universe::narrow_klass_base());
-    sub(dst, src, dst);
-    if (Universe::narrow_klass_shift() != 0) {
-      assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-      _lsr(dst, dst, Universe::narrow_klass_shift());
-    }
-  } else {
-    if (Universe::narrow_klass_shift() != 0) {
-      assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-      _lsr(dst, src, Universe::narrow_klass_shift());
-    } else {
-      mov(dst, src);
-    }
-  }
-}
-
-// Function instr_count_for_decode_klass_not_null() counts the instructions
-// generated by decode_klass_not_null(register r) and reinit_heapbase(),
-// when (Universe::heap() != NULL).  Hence, if the instructions they
-// generate change, then this method needs to be updated.
-int MacroAssembler::instr_count_for_decode_klass_not_null() {
-  assert(UseCompressedClassPointers, "only for compressed klass ptrs");
-  assert(Universe::heap() != NULL, "java heap should be initialized");
-  if (Universe::narrow_klass_base() != NULL) {
-    return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
-      1 +                                                                 // add
-      instr_count_for_mov_slow(Universe::narrow_ptrs_base());   // reinit_heapbase() = mov_slow
-  } else {
-    if (Universe::narrow_klass_shift() != 0) {
-      return 1;
-    }
-  }
-  return 0;
-}
-
-// Must preserve condition codes, or C2 decodeKlass_not_null rule
-// must be changed.
-void MacroAssembler::decode_klass_not_null(Register r) {
-  int off = offset();
-  assert(UseCompressedClassPointers, "should only be used for compressed headers");
-  assert(Universe::heap() != NULL, "java heap should be initialized");
-  assert(r != Rheap_base, "Decoding a klass in Rheap_base");
-  // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
-  // Also do not verify_oop as this is called by verify_oop.
-  if (Universe::narrow_klass_base() != NULL) {
-    // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
-    mov_slow(Rheap_base, Universe::narrow_klass_base());
-    add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
-    reinit_heapbase();
-  } else {
-    if (Universe::narrow_klass_shift() != 0) {
-      assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-      _lsl(r, r, Universe::narrow_klass_shift());
-    }
-  }
-  assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
-}
-
-// Must preserve condition codes, or C2 decodeKlass_not_null rule
-// must be changed.
-void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
-  if (src == dst) {
-    decode_klass_not_null(src);
-    return;
-  }
-
-  assert(UseCompressedClassPointers, "should only be used for compressed headers");
-  assert(Universe::heap() != NULL, "java heap should be initialized");
-  assert(src != Rheap_base, "Decoding a klass in Rheap_base");
-  assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
-  // Also do not verify_oop as this is called by verify_oop.
-  if (Universe::narrow_klass_base() != NULL) {
-    mov_slow(dst, Universe::narrow_klass_base());
-    add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
-  } else {
-    _lsl(dst, src, Universe::narrow_klass_shift());
-  }
-}
-
-
-void MacroAssembler::reinit_heapbase() {
-  if (UseCompressedOops || UseCompressedClassPointers) {
-    if (Universe::heap() != NULL) {
-      mov_slow(Rheap_base, Universe::narrow_ptrs_base());
-    } else {
-      ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
-    }
-  }
-}
-
-#ifdef ASSERT
-void MacroAssembler::verify_heapbase(const char* msg) {
-  // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
-  // Update it at modifications.
-  assert (UseCompressedOops, "should be compressed");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  if (CheckCompressedOops) {
-    Label ok;
-    str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
-    raw_push(Rtemp, ZR);
-    mrs(Rtemp, Assembler::SysReg_NZCV);
-    str(Rtemp, Address(SP, 1 * wordSize));
-    mov_slow(Rtemp, Universe::narrow_ptrs_base());
-    cmp(Rheap_base, Rtemp);
-    b(ok, eq);
-    stop(msg);
-    bind(ok);
-    ldr(Rtemp, Address(SP, 1 * wordSize));
-    msr(Assembler::SysReg_NZCV, Rtemp);
-    raw_pop(Rtemp, ZR);
-    str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
-  }
-}
-#endif // ASSERT
-
-#endif // AARCH64
-
 #ifdef COMPILER2
 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2, Register scratch3)
 {
@@ -3024,17 +2063,6 @@ void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch,
   // Check for recursive lock
   // See comments in InterpreterMacroAssembler::lock_object for
   // explanations on the fast recursive locking check.
-#ifdef AARCH64
-  intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
-  Assembler::LogicalImmediate imm(mask, false);
-  mov(Rscratch, SP);
-  sub(Rscratch, Rmark, Rscratch);
-  ands(Rscratch, Rscratch, imm);
-  // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
-  str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
-  b(done);
-
-#else
   // -1- test low 2 bits
   movs(Rscratch, AsmOperand(Rmark, lsl, 30));
   // -2- test (hdr - SP) if the low two bits are 0
@@ -3044,7 +2072,6 @@ void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch,
   // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
   b(done);
-#endif
 
   bind(fast_lock);
   str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
@@ -3060,7 +2087,7 @@ void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch,
   //  NE -> Failure, branch to slow path
 }
 
-void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2  AARCH64_ONLY_ARG(Register Rscratch3))
+void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2)
 {
   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
 
diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.hpp b/src/hotspot/cpu/arm/macroAssembler_arm.hpp
index 9337f958960..921d6a18a7e 100644
--- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp
@@ -229,10 +229,6 @@ class MacroAssembler: public Assembler {
   // this was subsequently modified to its present name and return type
   virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset);
 
-#ifdef AARCH64
-# define NOT_IMPLEMENTED() unimplemented("NYI at " __FILE__ ":" XSTR(__LINE__))
-# define NOT_TESTED()      warn("Not tested at " __FILE__ ":" XSTR(__LINE__))
-#endif
 
   void align(int modulus);
 
@@ -275,7 +271,7 @@ class MacroAssembler: public Assembler {
 
   // Always sets/resets sp, which default to SP if (last_sp == noreg)
   // Optionally sets/resets fp (use noreg to avoid setting it)
-  // Always sets/resets pc on AArch64; optionally sets/resets pc on 32-bit ARM depending on save_last_java_pc flag
+  // Optionally sets/resets pc depending on save_last_java_pc flag
   // Note: when saving PC, set_last_Java_frame returns PC's offset in the code section
   //       (for oop_maps offset computation)
   int set_last_Java_frame(Register last_sp, Register last_fp, bool save_last_java_pc, Register tmp);
@@ -401,7 +397,6 @@ class MacroAssembler: public Assembler {
 
   void resolve_jobject(Register value, Register tmp1, Register tmp2);
 
-#ifndef AARCH64
   void nop() {
     mov(R0, R0);
   }
@@ -441,7 +436,6 @@ class MacroAssembler: public Assembler {
   void fpops(FloatRegister fd, AsmCondition cond = al) {
     fldmias(SP, FloatRegisterSet(fd), writeback, cond);
   }
-#endif // !AARCH64
 
   void fpush(FloatRegisterSet reg_set) {
     fstmdbd(SP, reg_set, writeback);
@@ -471,15 +465,10 @@ class MacroAssembler: public Assembler {
     LoadLoad   = 1 << 0
   };
 
-#ifdef AARCH64
-  // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
-  void membar(Membar_mask_bits order_constraint, Register tmp = noreg);
-#else
   void membar(Membar_mask_bits mask,
               Register tmp,
               bool preserve_flags = true,
               Register load_tgt = noreg);
-#endif
 
   void breakpoint(AsmCondition cond = al);
   void stop(const char* msg);
@@ -511,47 +500,28 @@ class MacroAssembler: public Assembler {
   void add_slow(Register rd, Register rn, int c);
   void sub_slow(Register rd, Register rn, int c);
 
-#ifdef AARCH64
-  static int mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm /* optional */);
-#endif
 
-  void mov_slow(Register rd, intptr_t c NOT_AARCH64_ARG(AsmCondition cond = al));
+  void mov_slow(Register rd, intptr_t c, AsmCondition cond = al);
   void mov_slow(Register rd, const char *string);
   void mov_slow(Register rd, address addr);
 
   void patchable_mov_oop(Register rd, jobject o, int oop_index) {
-    mov_oop(rd, o, oop_index AARCH64_ONLY_ARG(true));
+    mov_oop(rd, o, oop_index);
   }
-  void mov_oop(Register rd, jobject o, int index = 0
-               AARCH64_ONLY_ARG(bool patchable = false)
-               NOT_AARCH64_ARG(AsmCondition cond = al));
-
+  void mov_oop(Register rd, jobject o, int index = 0, AsmCondition cond = al);
 
   void patchable_mov_metadata(Register rd, Metadata* o, int index) {
-    mov_metadata(rd, o, index AARCH64_ONLY_ARG(true));
+    mov_metadata(rd, o, index);
   }
-  void mov_metadata(Register rd, Metadata* o, int index = 0 AARCH64_ONLY_ARG(bool patchable = false));
+  void mov_metadata(Register rd, Metadata* o, int index = 0);
 
-  void mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond = al));
-  void mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond = al));
+  void mov_float(FloatRegister fd, jfloat c, AsmCondition cond = al);
+  void mov_double(FloatRegister fd, jdouble c, AsmCondition cond = al);
 
-#ifdef AARCH64
-  int mov_pc_to(Register rd) {
-    Label L;
-    adr(rd, L);
-    bind(L);
-    return offset();
-  }
-#endif
 
   // Note: this variant of mov_address assumes the address moves with
   // the code. Do *not* implement it with non-relocated instructions,
   // unless PC-relative.
-#ifdef AARCH64
-  void mov_relative_address(Register rd, address addr) {
-    adr(rd, addr);
-  }
-#else
   void mov_relative_address(Register rd, address addr, AsmCondition cond = al) {
     int offset = addr - pc() - 8;
     assert((offset & 3) == 0, "bad alignment");
@@ -563,7 +533,6 @@ class MacroAssembler: public Assembler {
       sub(rd, PC, -offset, cond);
     }
   }
-#endif // AARCH64
 
   // Runtime address that may vary from one execution to another. The
   // symbolic_reference describes what the address is, allowing
@@ -584,7 +553,6 @@ class MacroAssembler: public Assembler {
       mov_slow(rd, (intptr_t)addr);
       return;
     }
-#ifndef AARCH64
     if (VM_Version::supports_movw()) {
       relocate(rspec);
       int c = (int)addr;
@@ -594,15 +562,11 @@ class MacroAssembler: public Assembler {
       }
       return;
     }
-#endif
     Label skip_literal;
     InlinedAddress addr_literal(addr, rspec);
     ldr_literal(rd, addr_literal);
     b(skip_literal);
     bind_literal(addr_literal);
-    // AARCH64 WARNING: because of alignment padding, extra padding
-    // may be required to get a consistent size for C2, or rules must
-    // overestimate size see MachEpilogNode::size
     bind(skip_literal);
   }
 
@@ -616,45 +580,28 @@ class MacroAssembler: public Assembler {
     assert(L.rspec().type() != relocInfo::runtime_call_type, "avoid ldr_literal for calls");
     assert(L.rspec().type() != relocInfo::static_call_type, "avoid ldr_literal for calls");
     relocate(L.rspec());
-#ifdef AARCH64
-    ldr(rd, target(L.label));
-#else
     ldr(rd, Address(PC, target(L.label) - pc() - 8));
-#endif
   }
 
   void ldr_literal(Register rd, InlinedString& L) {
     const char* msg = L.msg();
     if (code()->consts()->contains((address)msg)) {
       // string address moves with the code
-#ifdef AARCH64
-      ldr(rd, (address)msg);
-#else
       ldr(rd, Address(PC, ((address)msg) - pc() - 8));
-#endif
       return;
     }
     // Warning: use external strings with care. They are not relocated
     // if the code moves. If needed, use code_string to move them
     // to the consts section.
-#ifdef AARCH64
-    ldr(rd, target(L.label));
-#else
     ldr(rd, Address(PC, target(L.label) - pc() - 8));
-#endif
   }
 
   void ldr_literal(Register rd, InlinedMetadata& L) {
     // relocation done in the bind_literal for metadatas
-#ifdef AARCH64
-    ldr(rd, target(L.label));
-#else
     ldr(rd, Address(PC, target(L.label) - pc() - 8));
-#endif
   }
 
   void bind_literal(InlinedAddress& L) {
-    AARCH64_ONLY(align(wordSize));
     bind(L.label);
     assert(L.rspec().type() != relocInfo::metadata_type, "Must use InlinedMetadata");
     // We currently do not use oop 'bound' literals.
@@ -672,13 +619,11 @@ class MacroAssembler: public Assembler {
       // to detect errors.
       return;
     }
-    AARCH64_ONLY(align(wordSize));
     bind(L.label);
     AbstractAssembler::emit_address((address)L.msg());
   }
 
   void bind_literal(InlinedMetadata& L) {
-    AARCH64_ONLY(align(wordSize));
     bind(L.label);
     relocate(metadata_Relocation::spec_for_immediate());
     AbstractAssembler::emit_address((address)L.data());
@@ -687,138 +632,106 @@ class MacroAssembler: public Assembler {
   void resolve_oop_handle(Register result);
   void load_mirror(Register mirror, Register method, Register tmp);
 
-  // Porting layer between 32-bit ARM and AArch64
-
-#define COMMON_INSTR_1(common_mnemonic, aarch64_mnemonic, arm32_mnemonic, arg_type) \
+#define ARM_INSTR_1(common_mnemonic, arm32_mnemonic, arg_type) \
   void common_mnemonic(arg_type arg) { \
-      AARCH64_ONLY(aarch64_mnemonic) NOT_AARCH64(arm32_mnemonic) (arg); \
+      arm32_mnemonic(arg); \
   }
 
-#define COMMON_INSTR_2(common_mnemonic, aarch64_mnemonic, arm32_mnemonic, arg1_type, arg2_type) \
+#define ARM_INSTR_2(common_mnemonic, arm32_mnemonic, arg1_type, arg2_type) \
   void common_mnemonic(arg1_type arg1, arg2_type arg2) { \
-      AARCH64_ONLY(aarch64_mnemonic) NOT_AARCH64(arm32_mnemonic) (arg1, arg2); \
+      arm32_mnemonic(arg1, arg2); \
   }
 
-#define COMMON_INSTR_3(common_mnemonic, aarch64_mnemonic, arm32_mnemonic, arg1_type, arg2_type, arg3_type) \
+#define ARM_INSTR_3(common_mnemonic, arm32_mnemonic, arg1_type, arg2_type, arg3_type) \
   void common_mnemonic(arg1_type arg1, arg2_type arg2, arg3_type arg3) { \
-      AARCH64_ONLY(aarch64_mnemonic) NOT_AARCH64(arm32_mnemonic) (arg1, arg2, arg3); \
+      arm32_mnemonic(arg1, arg2, arg3); \
   }
 
-  COMMON_INSTR_1(jump, br,  bx,  Register)
-  COMMON_INSTR_1(call, blr, blx, Register)
-
-  COMMON_INSTR_2(cbz_32,  cbz_w,  cbz,  Register, Label&)
-  COMMON_INSTR_2(cbnz_32, cbnz_w, cbnz, Register, Label&)
-
-  COMMON_INSTR_2(ldr_u32, ldr_w,  ldr,  Register, Address)
-  COMMON_INSTR_2(ldr_s32, ldrsw,  ldr,  Register, Address)
-  COMMON_INSTR_2(str_32,  str_w,  str,  Register, Address)
+  ARM_INSTR_1(jump, bx,  Register)
+  ARM_INSTR_1(call, blx, Register)
 
-  COMMON_INSTR_2(mvn_32,  mvn_w,  mvn,  Register, Register)
-  COMMON_INSTR_2(cmp_32,  cmp_w,  cmp,  Register, Register)
-  COMMON_INSTR_2(neg_32,  neg_w,  neg,  Register, Register)
-  COMMON_INSTR_2(clz_32,  clz_w,  clz,  Register, Register)
-  COMMON_INSTR_2(rbit_32, rbit_w, rbit, Register, Register)
+  ARM_INSTR_2(cbz_32,  cbz,  Register, Label&)
+  ARM_INSTR_2(cbnz_32, cbnz, Register, Label&)
 
-  COMMON_INSTR_2(cmp_32,  cmp_w,  cmp,  Register, int)
-  COMMON_INSTR_2(cmn_32,  cmn_w,  cmn,  Register, int)
+  ARM_INSTR_2(ldr_u32, ldr,  Register, Address)
+  ARM_INSTR_2(ldr_s32, ldr,  Register, Address)
+  ARM_INSTR_2(str_32,  str,  Register, Address)
 
-  COMMON_INSTR_3(add_32,  add_w,  add,  Register, Register, Register)
-  COMMON_INSTR_3(sub_32,  sub_w,  sub,  Register, Register, Register)
-  COMMON_INSTR_3(subs_32, subs_w, subs, Register, Register, Register)
-  COMMON_INSTR_3(mul_32,  mul_w,  mul,  Register, Register, Register)
-  COMMON_INSTR_3(and_32,  andr_w, andr, Register, Register, Register)
-  COMMON_INSTR_3(orr_32,  orr_w,  orr,  Register, Register, Register)
-  COMMON_INSTR_3(eor_32,  eor_w,  eor,  Register, Register, Register)
+  ARM_INSTR_2(mvn_32,  mvn,  Register, Register)
+  ARM_INSTR_2(cmp_32,  cmp,  Register, Register)
+  ARM_INSTR_2(neg_32,  neg,  Register, Register)
+  ARM_INSTR_2(clz_32,  clz,  Register, Register)
+  ARM_INSTR_2(rbit_32, rbit, Register, Register)
 
-  COMMON_INSTR_3(add_32,  add_w,  add,  Register, Register, AsmOperand)
-  COMMON_INSTR_3(sub_32,  sub_w,  sub,  Register, Register, AsmOperand)
-  COMMON_INSTR_3(orr_32,  orr_w,  orr,  Register, Register, AsmOperand)
-  COMMON_INSTR_3(eor_32,  eor_w,  eor,  Register, Register, AsmOperand)
-  COMMON_INSTR_3(and_32,  andr_w, andr, Register, Register, AsmOperand)
+  ARM_INSTR_2(cmp_32,  cmp,  Register, int)
+  ARM_INSTR_2(cmn_32,  cmn,  Register, int)
 
+  ARM_INSTR_3(add_32,  add,  Register, Register, Register)
+  ARM_INSTR_3(sub_32,  sub,  Register, Register, Register)
+  ARM_INSTR_3(subs_32, subs, Register, Register, Register)
+  ARM_INSTR_3(mul_32,  mul,  Register, Register, Register)
+  ARM_INSTR_3(and_32,  andr, Register, Register, Register)
+  ARM_INSTR_3(orr_32,  orr,  Register, Register, Register)
+  ARM_INSTR_3(eor_32,  eor,  Register, Register, Register)
 
-  COMMON_INSTR_3(add_32,  add_w,  add,  Register, Register, int)
-  COMMON_INSTR_3(adds_32, adds_w, adds, Register, Register, int)
-  COMMON_INSTR_3(sub_32,  sub_w,  sub,  Register, Register, int)
-  COMMON_INSTR_3(subs_32, subs_w, subs, Register, Register, int)
+  ARM_INSTR_3(add_32,  add,  Register, Register, AsmOperand)
+  ARM_INSTR_3(sub_32,  sub,  Register, Register, AsmOperand)
+  ARM_INSTR_3(orr_32,  orr,  Register, Register, AsmOperand)
+  ARM_INSTR_3(eor_32,  eor,  Register, Register, AsmOperand)
+  ARM_INSTR_3(and_32,  andr, Register, Register, AsmOperand)
 
-  COMMON_INSTR_2(tst_32,  tst_w,  tst,  Register, unsigned int)
-  COMMON_INSTR_2(tst_32,  tst_w,  tst,  Register, AsmOperand)
 
-  COMMON_INSTR_3(and_32,  andr_w, andr, Register, Register, uint)
-  COMMON_INSTR_3(orr_32,  orr_w,  orr,  Register, Register, uint)
-  COMMON_INSTR_3(eor_32,  eor_w,  eor,  Register, Register, uint)
+  ARM_INSTR_3(add_32,  add,  Register, Register, int)
+  ARM_INSTR_3(adds_32, adds, Register, Register, int)
+  ARM_INSTR_3(sub_32,  sub,  Register, Register, int)
+  ARM_INSTR_3(subs_32, subs, Register, Register, int)
 
-  COMMON_INSTR_1(cmp_zero_float,  fcmp0_s, fcmpzs, FloatRegister)
-  COMMON_INSTR_1(cmp_zero_double, fcmp0_d, fcmpzd, FloatRegister)
+  ARM_INSTR_2(tst_32,  tst,  Register, unsigned int)
+  ARM_INSTR_2(tst_32,  tst,  Register, AsmOperand)
 
-  COMMON_INSTR_2(ldr_float,   ldr_s,   flds,   FloatRegister, Address)
-  COMMON_INSTR_2(str_float,   str_s,   fsts,   FloatRegister, Address)
-  COMMON_INSTR_2(mov_float,   fmov_s,  fcpys,  FloatRegister, FloatRegister)
-  COMMON_INSTR_2(neg_float,   fneg_s,  fnegs,  FloatRegister, FloatRegister)
-  COMMON_INSTR_2(abs_float,   fabs_s,  fabss,  FloatRegister, FloatRegister)
-  COMMON_INSTR_2(sqrt_float,  fsqrt_s, fsqrts, FloatRegister, FloatRegister)
-  COMMON_INSTR_2(cmp_float,   fcmp_s,  fcmps,  FloatRegister, FloatRegister)
+  ARM_INSTR_3(and_32,  andr, Register, Register, uint)
+  ARM_INSTR_3(orr_32,  orr,  Register, Register, uint)
+  ARM_INSTR_3(eor_32,  eor,  Register, Register, uint)
 
-  COMMON_INSTR_3(add_float,   fadd_s,  fadds,  FloatRegister, FloatRegister, FloatRegister)
-  COMMON_INSTR_3(sub_float,   fsub_s,  fsubs,  FloatRegister, FloatRegister, FloatRegister)
-  COMMON_INSTR_3(mul_float,   fmul_s,  fmuls,  FloatRegister, FloatRegister, FloatRegister)
-  COMMON_INSTR_3(div_float,   fdiv_s,  fdivs,  FloatRegister, FloatRegister, FloatRegister)
+  ARM_INSTR_1(cmp_zero_float,  fcmpzs, FloatRegister)
+  ARM_INSTR_1(cmp_zero_double, fcmpzd, FloatRegister)
 
-  COMMON_INSTR_2(ldr_double,  ldr_d,   fldd,   FloatRegister, Address)
-  COMMON_INSTR_2(str_double,  str_d,   fstd,   FloatRegister, Address)
-  COMMON_INSTR_2(mov_double,  fmov_d,  fcpyd,  FloatRegister, FloatRegister)
-  COMMON_INSTR_2(neg_double,  fneg_d,  fnegd,  FloatRegister, FloatRegister)
-  COMMON_INSTR_2(cmp_double,  fcmp_d,  fcmpd,  FloatRegister, FloatRegister)
-  COMMON_INSTR_2(abs_double,  fabs_d,  fabsd,  FloatRegister, FloatRegister)
-  COMMON_INSTR_2(sqrt_double, fsqrt_d, fsqrtd, FloatRegister, FloatRegister)
+  ARM_INSTR_2(ldr_float,   flds,   FloatRegister, Address)
+  ARM_INSTR_2(str_float,   fsts,   FloatRegister, Address)
+  ARM_INSTR_2(mov_float,   fcpys,  FloatRegister, FloatRegister)
+  ARM_INSTR_2(neg_float,   fnegs,  FloatRegister, FloatRegister)
+  ARM_INSTR_2(abs_float,   fabss,  FloatRegister, FloatRegister)
+  ARM_INSTR_2(sqrt_float,  fsqrts, FloatRegister, FloatRegister)
+  ARM_INSTR_2(cmp_float,   fcmps,  FloatRegister, FloatRegister)
 
-  COMMON_INSTR_3(add_double,  fadd_d,  faddd,  FloatRegister, FloatRegister, FloatRegister)
-  COMMON_INSTR_3(sub_double,  fsub_d,  fsubd,  FloatRegister, FloatRegister, FloatRegister)
-  COMMON_INSTR_3(mul_double,  fmul_d,  fmuld,  FloatRegister, FloatRegister, FloatRegister)
-  COMMON_INSTR_3(div_double,  fdiv_d,  fdivd,  FloatRegister, FloatRegister, FloatRegister)
+  ARM_INSTR_3(add_float,   fadds,  FloatRegister, FloatRegister, FloatRegister)
+  ARM_INSTR_3(sub_float,   fsubs,  FloatRegister, FloatRegister, FloatRegister)
+  ARM_INSTR_3(mul_float,   fmuls,  FloatRegister, FloatRegister, FloatRegister)
+  ARM_INSTR_3(div_float,   fdivs,  FloatRegister, FloatRegister, FloatRegister)
 
-  COMMON_INSTR_2(convert_f2d, fcvt_ds, fcvtds, FloatRegister, FloatRegister)
-  COMMON_INSTR_2(convert_d2f, fcvt_sd, fcvtsd, FloatRegister, FloatRegister)
+  ARM_INSTR_2(ldr_double,  fldd,   FloatRegister, Address)
+  ARM_INSTR_2(str_double,  fstd,   FloatRegister, Address)
+  ARM_INSTR_2(mov_double,  fcpyd,  FloatRegister, FloatRegister)
+  ARM_INSTR_2(neg_double,  fnegd,  FloatRegister, FloatRegister)
+  ARM_INSTR_2(cmp_double,  fcmpd,  FloatRegister, FloatRegister)
+  ARM_INSTR_2(abs_double,  fabsd,  FloatRegister, FloatRegister)
+  ARM_INSTR_2(sqrt_double, fsqrtd, FloatRegister, FloatRegister)
 
-  COMMON_INSTR_2(mov_fpr2gpr_float, fmov_ws, fmrs, Register, FloatRegister)
+  ARM_INSTR_3(add_double,  faddd,  FloatRegister, FloatRegister, FloatRegister)
+  ARM_INSTR_3(sub_double,  fsubd,  FloatRegister, FloatRegister, FloatRegister)
+  ARM_INSTR_3(mul_double,  fmuld,  FloatRegister, FloatRegister, FloatRegister)
+  ARM_INSTR_3(div_double,  fdivd,  FloatRegister, FloatRegister, FloatRegister)
 
-#undef COMMON_INSTR_1
-#undef COMMON_INSTR_2
-#undef COMMON_INSTR_3
+  ARM_INSTR_2(convert_f2d, fcvtds, FloatRegister, FloatRegister)
+  ARM_INSTR_2(convert_d2f, fcvtsd, FloatRegister, FloatRegister)
 
+  ARM_INSTR_2(mov_fpr2gpr_float, fmrs, Register, FloatRegister)
 
-#ifdef AARCH64
-
-  void mov(Register dst, Register src, AsmCondition cond) {
-    if (cond == al) {
-      mov(dst, src);
-    } else {
-      csel(dst, src, dst, cond);
-    }
-  }
-
-  // Propagate other overloaded "mov" methods from Assembler.
-  void mov(Register dst, Register src)    { Assembler::mov(dst, src); }
-  void mov(Register rd, int imm)          { Assembler::mov(rd, imm);  }
-
-  void mov(Register dst, int imm, AsmCondition cond) {
-    assert(imm == 0 || imm == 1, "");
-    if (imm == 0) {
-      mov(dst, ZR, cond);
-    } else if (imm == 1) {
-      csinc(dst, dst, ZR, inverse(cond));
-    } else if (imm == -1) {
-      csinv(dst, dst, ZR, inverse(cond));
-    } else {
-      fatal("illegal mov(R%d,%d,cond)", dst->encoding(), imm);
-    }
-  }
+#undef ARM_INSTR_1
+#undef ARM_INSTR_2
+#undef ARM_INSTR_3
 
-  void movs(Register dst, Register src)    { adds(dst, src, 0); }
 
-#else // AARCH64
 
   void tbz(Register rt, int bit, Label& L) {
     assert(0 <= bit && bit < BitsPerWord, "bit number is out of range");
@@ -851,166 +764,91 @@ class MacroAssembler: public Assembler {
     bx(dst);
   }
 
-#endif // AARCH64
 
   Register zero_register(Register tmp) {
-#ifdef AARCH64
-    return ZR;
-#else
     mov(tmp, 0);
     return tmp;
-#endif
   }
 
   void logical_shift_left(Register dst, Register src, int shift) {
-#ifdef AARCH64
-    _lsl(dst, src, shift);
-#else
     mov(dst, AsmOperand(src, lsl, shift));
-#endif
   }
 
   void logical_shift_left_32(Register dst, Register src, int shift) {
-#ifdef AARCH64
-    _lsl_w(dst, src, shift);
-#else
     mov(dst, AsmOperand(src, lsl, shift));
-#endif
   }
 
   void logical_shift_right(Register dst, Register src, int shift) {
-#ifdef AARCH64
-    _lsr(dst, src, shift);
-#else
     mov(dst, AsmOperand(src, lsr, shift));
-#endif
   }
 
   void arith_shift_right(Register dst, Register src, int shift) {
-#ifdef AARCH64
-    _asr(dst, src, shift);
-#else
     mov(dst, AsmOperand(src, asr, shift));
-#endif
   }
 
   void asr_32(Register dst, Register src, int shift) {
-#ifdef AARCH64
-    _asr_w(dst, src, shift);
-#else
     mov(dst, AsmOperand(src, asr, shift));
-#endif
   }
 
   // If <cond> holds, compares r1 and r2. Otherwise, flags are set so that <cond> does not hold.
   void cond_cmp(Register r1, Register r2, AsmCondition cond) {
-#ifdef AARCH64
-    ccmp(r1, r2, flags_for_condition(inverse(cond)), cond);
-#else
     cmp(r1, r2, cond);
-#endif
   }
 
   // If <cond> holds, compares r and imm. Otherwise, flags are set so that <cond> does not hold.
   void cond_cmp(Register r, int imm, AsmCondition cond) {
-#ifdef AARCH64
-    ccmp(r, imm, flags_for_condition(inverse(cond)), cond);
-#else
     cmp(r, imm, cond);
-#endif
   }
 
   void align_reg(Register dst, Register src, int align) {
     assert (is_power_of_2(align), "should be");
-#ifdef AARCH64
-    andr(dst, src, ~(uintx)(align-1));
-#else
     bic(dst, src, align-1);
-#endif
   }
 
   void prefetch_read(Address addr) {
-#ifdef AARCH64
-    prfm(pldl1keep, addr);
-#else
     pld(addr);
-#endif
   }
 
   void raw_push(Register r1, Register r2) {
-#ifdef AARCH64
-    stp(r1, r2, Address(SP, -2*wordSize, pre_indexed));
-#else
     assert(r1->encoding() < r2->encoding(), "should be ordered");
     push(RegisterSet(r1) | RegisterSet(r2));
-#endif
   }
 
   void raw_pop(Register r1, Register r2) {
-#ifdef AARCH64
-    ldp(r1, r2, Address(SP, 2*wordSize, post_indexed));
-#else
     assert(r1->encoding() < r2->encoding(), "should be ordered");
     pop(RegisterSet(r1) | RegisterSet(r2));
-#endif
   }
 
   void raw_push(Register r1, Register r2, Register r3) {
-#ifdef AARCH64
-    raw_push(r1, r2);
-    raw_push(r3, ZR);
-#else
     assert(r1->encoding() < r2->encoding() && r2->encoding() < r3->encoding(), "should be ordered");
     push(RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3));
-#endif
   }
 
   void raw_pop(Register r1, Register r2, Register r3) {
-#ifdef AARCH64
-    raw_pop(r3, ZR);
-    raw_pop(r1, r2);
-#else
     assert(r1->encoding() < r2->encoding() && r2->encoding() < r3->encoding(), "should be ordered");
     pop(RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3));
-#endif
   }
 
   // Restores registers r1 and r2 previously saved by raw_push(r1, r2, ret_addr) and returns by ret_addr. Clobbers LR.
   void raw_pop_and_ret(Register r1, Register r2) {
-#ifdef AARCH64
-    raw_pop(r1, r2, LR);
-    ret();
-#else
     raw_pop(r1, r2, PC);
-#endif
   }
 
   void indirect_jump(Address addr, Register scratch) {
-#ifdef AARCH64
-    ldr(scratch, addr);
-    br(scratch);
-#else
     ldr(PC, addr);
-#endif
   }
 
   void indirect_jump(InlinedAddress& literal, Register scratch) {
-#ifdef AARCH64
-    ldr_literal(scratch, literal);
-    br(scratch);
-#else
     ldr_literal(PC, literal);
-#endif
   }
 
-#ifndef AARCH64
   void neg(Register dst, Register src) {
     rsb(dst, src, 0);
   }
-#endif
 
   void branch_if_negative_32(Register r, Label& L) {
-    // Note about branch_if_negative_32() / branch_if_any_negative_32() implementation for AArch64:
+    // TODO: This function and branch_if_any_negative_32 could possibly
+    // be revised after the aarch64 removal.
     // tbnz is not used instead of tst & b.mi because destination may be out of tbnz range (+-32KB)
     // since these methods are used in LIR_Assembler::emit_arraycopy() to jump to stub entry.
     tst_32(r, r);
@@ -1018,40 +856,22 @@ class MacroAssembler: public Assembler {
   }
 
   void branch_if_any_negative_32(Register r1, Register r2, Register tmp, Label& L) {
-#ifdef AARCH64
-    orr_32(tmp, r1, r2);
-    tst_32(tmp, tmp);
-#else
     orrs(tmp, r1, r2);
-#endif
     b(L, mi);
   }
 
   void branch_if_any_negative_32(Register r1, Register r2, Register r3, Register tmp, Label& L) {
     orr_32(tmp, r1, r2);
-#ifdef AARCH64
-    orr_32(tmp, tmp, r3);
-    tst_32(tmp, tmp);
-#else
     orrs(tmp, tmp, r3);
-#endif
     b(L, mi);
   }
 
   void add_ptr_scaled_int32(Register dst, Register r1, Register r2, int shift) {
-#ifdef AARCH64
-      add(dst, r1, r2, ex_sxtw, shift);
-#else
       add(dst, r1, AsmOperand(r2, lsl, shift));
-#endif
   }
 
   void sub_ptr_scaled_int32(Register dst, Register r1, Register r2, int shift) {
-#ifdef AARCH64
-    sub(dst, r1, r2, ex_sxtw, shift);
-#else
     sub(dst, r1, AsmOperand(r2, lsl, shift));
-#endif
   }
 
   // C 'boolean' to Java boolean: x == 0 ? 0 : 1
@@ -1059,18 +879,10 @@ class MacroAssembler: public Assembler {
 
     // klass oop manipulations if compressed
 
-#ifdef AARCH64
-  void load_klass(Register dst_klass, Register src_oop);
-#else
   void load_klass(Register dst_klass, Register src_oop, AsmCondition cond = al);
-#endif // AARCH64
 
   void store_klass(Register src_klass, Register dst_oop);
 
-#ifdef AARCH64
-  void store_klass_gap(Register dst);
-#endif // AARCH64
-
     // oop manipulations
 
   void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0);
@@ -1080,39 +892,6 @@ class MacroAssembler: public Assembler {
   void access_load_at(BasicType type, DecoratorSet decorators, Address src, Register dst, Register tmp1, Register tmp2, Register tmp3);
   void access_store_at(BasicType type, DecoratorSet decorators, Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null);
 
-#ifdef AARCH64
-  void encode_heap_oop(Register dst, Register src);
-  void encode_heap_oop(Register r) {
-    encode_heap_oop(r, r);
-  }
-  void decode_heap_oop(Register dst, Register src);
-  void decode_heap_oop(Register r) {
-      decode_heap_oop(r, r);
-  }
-
-#ifdef COMPILER2
-  void encode_heap_oop_not_null(Register dst, Register src);
-  void decode_heap_oop_not_null(Register dst, Register src);
-
-  void set_narrow_klass(Register dst, Klass* k);
-  void set_narrow_oop(Register dst, jobject obj);
-#endif
-
-  void encode_klass_not_null(Register r);
-  void encode_klass_not_null(Register dst, Register src);
-  void decode_klass_not_null(Register r);
-  void decode_klass_not_null(Register dst, Register src);
-
-  void reinit_heapbase();
-
-#ifdef ASSERT
-  void verify_heapbase(const char* msg);
-#endif // ASSERT
-
-  static int instr_count_for_mov_slow(intptr_t c);
-  static int instr_count_for_mov_slow(address addr);
-  static int instr_count_for_decode_klass_not_null();
-#endif // AARCH64
 
   void ldr_global_ptr(Register reg, address address_of_global);
   void ldr_global_s32(Register reg, address address_of_global);
@@ -1128,12 +907,7 @@ class MacroAssembler: public Assembler {
 
     assert ((offset() & (wordSize-1)) == 0, "should be aligned by word size");
 
-#ifdef AARCH64
-    emit_int32(address_placeholder_instruction);
-    emit_int32(address_placeholder_instruction);
-#else
     AbstractAssembler::emit_address((address)address_placeholder_instruction);
-#endif
   }
 
   void b(address target, AsmCondition cond = al) {
@@ -1144,15 +918,14 @@ class MacroAssembler: public Assembler {
     Assembler::b(target(L), cond);
   }
 
-  void bl(address target NOT_AARCH64_ARG(AsmCondition cond = al)) {
-    Assembler::bl(target NOT_AARCH64_ARG(cond));
+  void bl(address target, AsmCondition cond = al) {
+    Assembler::bl(target, cond);
   }
-  void bl(Label& L NOT_AARCH64_ARG(AsmCondition cond = al)) {
+  void bl(Label& L, AsmCondition cond = al) {
     // internal calls
-    Assembler::bl(target(L)  NOT_AARCH64_ARG(cond));
+    Assembler::bl(target(L), cond);
   }
 
-#ifndef AARCH64
   void adr(Register dest, Label& L, AsmCondition cond = al) {
     int delta = target(L) - pc() - 8;
     if (delta >= 0) {
@@ -1161,7 +934,6 @@ class MacroAssembler: public Assembler {
       sub(dest, PC, -delta, cond);
     }
   }
-#endif // !AARCH64
 
   // Variable-length jump and calls. We now distinguish only the
   // patchable case from the other cases. Patchable must be
@@ -1185,30 +957,23 @@ class MacroAssembler: public Assembler {
   // specified to allow future optimizations.
   void jump(address target,
             relocInfo::relocType rtype = relocInfo::runtime_call_type,
-            Register scratch = AARCH64_ONLY(Rtemp) NOT_AARCH64(noreg)
-#ifndef AARCH64
-            , AsmCondition cond = al
-#endif
-            );
+            Register scratch = noreg, AsmCondition cond = al);
 
   void call(address target,
-            RelocationHolder rspec
-            NOT_AARCH64_ARG(AsmCondition cond = al));
+            RelocationHolder rspec, AsmCondition cond = al);
 
   void call(address target,
-            relocInfo::relocType rtype = relocInfo::runtime_call_type
-            NOT_AARCH64_ARG(AsmCondition cond = al)) {
-    call(target, Relocation::spec_simple(rtype) NOT_AARCH64_ARG(cond));
+            relocInfo::relocType rtype = relocInfo::runtime_call_type,
+            AsmCondition cond = al) {
+    call(target, Relocation::spec_simple(rtype), cond);
   }
 
   void jump(AddressLiteral dest) {
     jump(dest.target(), dest.reloc());
   }
-#ifndef AARCH64
   void jump(address dest, relocInfo::relocType rtype, AsmCondition cond) {
     jump(dest, rtype, Rtemp, cond);
   }
-#endif
 
   void call(AddressLiteral dest) {
     call(dest.target(), dest.reloc());
@@ -1226,10 +991,7 @@ class MacroAssembler: public Assembler {
   // specified to allow future optimizations.
   void patchable_jump(address target,
                       relocInfo::relocType rtype = relocInfo::runtime_call_type,
-                      Register scratch = AARCH64_ONLY(Rtemp) NOT_AARCH64(noreg)
-#ifndef AARCH64
-                      , AsmCondition cond = al
-#endif
+                      Register scratch = noreg, AsmCondition cond = al
                       );
 
   // patchable_call may scratch Rtemp
@@ -1243,13 +1005,7 @@ class MacroAssembler: public Assembler {
     return patchable_call(target, Relocation::spec_simple(rtype), c2);
   }
 
-#if defined(AARCH64) && defined(COMPILER2)
-  static int call_size(address target, bool far, bool patchable);
-#endif
 
-#ifdef AARCH64
-  static bool page_reachable_from_cache(address target);
-#endif
   static bool _reachable_from_cache(address target);
   static bool _cache_fully_reachable();
   bool cache_fully_reachable();
@@ -1259,15 +1015,8 @@ class MacroAssembler: public Assembler {
   void sign_extend(Register rd, Register rn, int bits);
 
   inline void zap_high_non_significant_bits(Register r) {
-#ifdef AARCH64
-    if(ZapHighNonSignificantBits) {
-      movk(r, 0xBAAD, 48);
-      movk(r, 0xF00D, 32);
-    }
-#endif
   }
 
-#ifndef AARCH64
   void long_move(Register rd_lo, Register rd_hi,
                  Register rn_lo, Register rn_hi,
                  AsmCondition cond = al);
@@ -1281,7 +1030,6 @@ class MacroAssembler: public Assembler {
   void atomic_cas(Register tmpreg1, Register tmpreg2, Register oldval, Register newval, Register base, int offset);
   void atomic_cas_bool(Register oldval, Register newval, Register base, int offset, Register tmpreg);
   void atomic_cas64(Register temp_lo, Register temp_hi, Register temp_result, Register oldval_lo, Register oldval_hi, Register newval_lo, Register newval_hi, Register base, int offset);
-#endif // !AARCH64
 
   void cas_for_lock_acquire(Register oldval, Register newval, Register base, Register tmp, Label &slow_case, bool allow_fallthrough_on_failure = false, bool one_shot = false);
   void cas_for_lock_release(Register oldval, Register newval, Register base, Register tmp, Label &slow_case, bool allow_fallthrough_on_failure = false, bool one_shot = false);
@@ -1304,14 +1052,9 @@ class MacroAssembler: public Assembler {
   // size must not exceed wordSize (i.e. 8-byte values are not supported on 32-bit ARM);
   // each of these calls generates exactly one load or store instruction,
   // so src can be pre- or post-indexed address.
-#ifdef AARCH64
-  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
-  void store_sized_value(Register src, Address dst, size_t size_in_bytes);
-#else
   // 32-bit ARM variants also support conditional execution
   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, AsmCondition cond = al);
   void store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond = al);
-#endif
 
   void lookup_interface_method(Register recv_klass,
                                Register intf_klass,
@@ -1333,11 +1076,7 @@ class MacroAssembler: public Assembler {
 
   void ldr_literal(Register rd, AddressLiteral addr) {
     relocate(addr.rspec());
-#ifdef AARCH64
-    ldr(rd, addr.target());
-#else
     ldr(rd, Address(PC, addr.target() - pc() - 8));
-#endif
   }
 
   void lea(Register Rd, AddressLiteral addr) {
@@ -1350,43 +1089,8 @@ class MacroAssembler: public Assembler {
 #ifdef COMPILER2
   // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
   void fast_lock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3 = noreg);
-#ifdef AARCH64
-  void fast_unlock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3);
-#else
   void fast_unlock(Register obj, Register box, Register scratch, Register scratch2);
 #endif
-#endif
-
-#ifdef AARCH64
-
-#define F(mnemonic)                                             \
-  void mnemonic(Register rt, address target) {                  \
-    Assembler::mnemonic(rt, target);                            \
-  }                                                             \
-  void mnemonic(Register rt, Label& L) {                        \
-    Assembler::mnemonic(rt, target(L));                         \
-  }
-
-  F(cbz_w);
-  F(cbnz_w);
-  F(cbz);
-  F(cbnz);
-
-#undef F
-
-#define F(mnemonic)                                             \
-  void mnemonic(Register rt, int bit, address target) {         \
-    Assembler::mnemonic(rt, bit, target);                       \
-  }                                                             \
-  void mnemonic(Register rt, int bit, Label& L) {               \
-    Assembler::mnemonic(rt, bit, target(L));                    \
-  }
-
-  F(tbz);
-  F(tbnz);
-#undef F
-
-#endif // AARCH64
 
 };
 
diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.inline.hpp b/src/hotspot/cpu/arm/macroAssembler_arm.inline.hpp
index 14c844ce84d..5e19e22ccbe 100644
--- a/src/hotspot/cpu/arm/macroAssembler_arm.inline.hpp
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.inline.hpp
@@ -32,46 +32,9 @@
 
 inline void MacroAssembler::pd_patch_instruction(address branch, address target) {
   int instr = *(int*)branch;
-  int new_offset = (int)(target - branch NOT_AARCH64(- 8));
+  int new_offset = (int)(target - branch - 8);
   assert((new_offset & 3) == 0, "bad alignment");
 
-#ifdef AARCH64
-  if ((instr & (0x1f << 26)) == (0b00101 << 26)) {
-    // Unconditional B or BL
-    assert (is_offset_in_range(new_offset, 26), "offset is too large");
-    *(int*)branch = (instr & ~right_n_bits(26)) | encode_offset(new_offset, 26, 0);
-  } else if ((instr & (0xff << 24)) == (0b01010100 << 24) && (instr & (1 << 4)) == 0) {
-    // Conditional B
-    assert (is_offset_in_range(new_offset, 19), "offset is too large");
-    *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_offset(new_offset, 19, 5);
-  } else if ((instr & (0b111111 << 25)) == (0b011010 << 25)) {
-    // Compare & branch CBZ/CBNZ
-    assert (is_offset_in_range(new_offset, 19), "offset is too large");
-    *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_offset(new_offset, 19, 5);
-  } else if ((instr & (0b111111 << 25)) == (0b011011 << 25)) {
-    // Test & branch TBZ/TBNZ
-    assert (is_offset_in_range(new_offset, 14), "offset is too large");
-    *(int*)branch = (instr & ~(right_n_bits(14) << 5)) | encode_offset(new_offset, 14, 5);
-  } else if ((instr & (0b111011 << 24)) == (0b011000 << 24)) {
-    // LDR (literal)
-    unsigned opc = ((unsigned)instr >> 30);
-    assert (opc != 0b01 || ((uintx)target & 7) == 0, "ldr target should be aligned");
-    assert (is_offset_in_range(new_offset, 19), "offset is too large");
-    *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_offset(new_offset, 19, 5);
-  } else if (((instr & (1 << 31)) == 0) && ((instr & (0b11111 << 24)) == (0b10000 << 24))) {
-    // ADR
-    assert (is_imm_in_range(new_offset, 21, 0), "offset is too large");
-    instr = (instr & ~(right_n_bits(2) << 29)) | (new_offset & 3) << 29;
-    *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_imm(new_offset >> 2, 19, 0, 5);
-  } else if((unsigned int)instr == address_placeholder_instruction) {
-    // address
-    assert (*(unsigned int *)(branch + InstructionSize) == address_placeholder_instruction, "address placeholder occupies two instructions");
-    *(intx*)branch = (intx)target;
-  } else {
-    ::tty->print_cr("=============== instruction: 0x%x ================\n", instr);
-    Unimplemented(); // TODO-AARCH64
-  }
-#else
   if ((instr & 0x0e000000) == 0x0a000000) {
     // B or BL instruction
     assert(new_offset < 0x2000000 && new_offset > -0x2000000, "encoding constraint");
@@ -98,7 +61,6 @@ inline void MacroAssembler::pd_patch_instruction(address branch, address target)
       *(int*)branch = (instr & 0xff0ff000) | 1 << 20 | -new_offset;
     }
   }
-#endif // AARCH64
 }
 
 #endif // CPU_ARM_VM_MACROASSEMBLER_ARM_INLINE_HPP
diff --git a/src/hotspot/cpu/arm/methodHandles_arm.cpp b/src/hotspot/cpu/arm/methodHandles_arm.cpp
index d4d1bf4abb7..bd2103ff34a 100644
--- a/src/hotspot/cpu/arm/methodHandles_arm.cpp
+++ b/src/hotspot/cpu/arm/methodHandles_arm.cpp
@@ -125,15 +125,8 @@ void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, bool for_comp
     // compiled code in threads for which the event is enabled.  Check here for
     // interp_only_mode if these events CAN be enabled.
     __ ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset()));
-#ifdef AARCH64
-    Label L;
-    __ cbz(Rtemp, L);
-    __ indirect_jump(Address(Rmethod, Method::interpreter_entry_offset()), Rtemp);
-    __ bind(L);
-#else
     __ cmp(Rtemp, 0);
     __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()), ne);
-#endif // AARCH64
   }
   const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
                                                      Method::from_interpreted_offset();
@@ -268,11 +261,7 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*
       DEBUG_ONLY(rdx_param_size = noreg);
     }
     Register rbx_member = rbx_method;  // MemberName ptr; incoming method ptr is dead now
-#ifdef AARCH64
-    __ ldr(rbx_member, Address(Rparams, Interpreter::stackElementSize, post_indexed));
-#else
     __ pop(rbx_member);
-#endif
     generate_method_handle_dispatch(_masm, iid, rcx_recv, rbx_member, not_for_compiler_entry);
   }
   return entry_point;
@@ -288,22 +277,15 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
   Register rbx_method = Rmethod;   // eventual target of this invocation
   // temps used in this code are not used in *either* compiled or interpreted calling sequences
   Register temp1 = (for_compiler_entry ? saved_last_sp_register() : R1_tmp);
-  Register temp2 = AARCH64_ONLY(R9) NOT_AARCH64(R8);
+  Register temp2 = R8;
   Register temp3 = Rtemp; // R12/R16
-  Register temp4 = AARCH64_ONLY(Rtemp2) NOT_AARCH64(R5);
+  Register temp4 = R5;
   if (for_compiler_entry) {
     assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
-#ifdef AARCH64
-    assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
-    assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
-    assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
-    assert_different_registers(temp4, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
-#else
     assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3);
     assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3);
     assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3);
     assert_different_registers(temp4, j_rarg0, j_rarg1, j_rarg2, j_rarg3);
-#endif // AARCH64
   }
   assert_different_registers(temp1, temp2, temp3, receiver_reg);
   assert_different_registers(temp1, temp2, temp3, temp4, member_reg);
@@ -353,12 +335,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
         __ load_heap_oop(temp2_defc, member_clazz);
         load_klass_from_Class(_masm, temp2_defc, temp3, temp4);
         __ verify_klass_ptr(temp2_defc);
-#ifdef AARCH64
-        // TODO-AARCH64
-        __ b(L_ok);
-#else
         __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, noreg, L_ok);
-#endif
         // If we get here, the type check failed!
         __ stop("receiver class disagrees with MemberName.clazz");
         __ bind(L_ok);
@@ -485,13 +462,9 @@ enum {
   // the slop defends against false alarms due to fencepost errors
 };
 
-#ifdef AARCH64
-const int trace_mh_nregs = 32; // R0-R30, PC
-#else
 const int trace_mh_nregs = 15;
 const Register trace_mh_regs[trace_mh_nregs] =
   {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
-#endif // AARCH64
 
 void trace_method_handle_stub(const char* adaptername,
                               intptr_t* saved_regs,
@@ -502,7 +475,7 @@ void trace_method_handle_stub(const char* adaptername,
                  strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
   intptr_t* entry_sp = (intptr_t*) &saved_regs[trace_mh_nregs]; // just after the saved regs
   intptr_t* saved_sp = (intptr_t*)  saved_regs[Rsender_sp->encoding()]; // save of Rsender_sp
-  intptr_t* last_sp  = (intptr_t*)  saved_bp[AARCH64_ONLY(frame::interpreter_frame_stack_top_offset) NOT_AARCH64(frame::interpreter_frame_last_sp_offset)];
+  intptr_t* last_sp  = (intptr_t*)  saved_bp[frame::interpreter_frame_last_sp_offset];
   intptr_t* base_sp  = last_sp;
 
   intptr_t    mh_reg = (intptr_t)saved_regs[R5_mh->encoding()];
@@ -518,13 +491,9 @@ void trace_method_handle_stub(const char* adaptername,
     tty->print(" reg dump: ");
     int i;
     for (i = 0; i < trace_mh_nregs; i++) {
-      if (i > 0 && i % AARCH64_ONLY(2) NOT_AARCH64(4) == 0)
+      if (i > 0 && i % 4 == 0)
         tty->print("\n   + dump: ");
-#ifdef AARCH64
-      const char* reg_name = (i == trace_mh_nregs-1) ? "pc" : as_Register(i)->name();
-#else
       const char* reg_name = trace_mh_regs[i]->name();
-#endif
       tty->print(" %s: " INTPTR_FORMAT, reg_name, p2i((void *)saved_regs[i]));
     }
     tty->cr();
diff --git a/src/hotspot/cpu/arm/nativeInst_arm.hpp b/src/hotspot/cpu/arm/nativeInst_arm.hpp
index 1b60ed06f3f..8240ab04ce1 100644
--- a/src/hotspot/cpu/arm/nativeInst_arm.hpp
+++ b/src/hotspot/cpu/arm/nativeInst_arm.hpp
@@ -30,11 +30,7 @@
 #include "runtime/os.hpp"
 
 
-#ifdef AARCH64
-#include "nativeInst_arm_64.hpp"
-#else
 #include "nativeInst_arm_32.hpp"
-#endif
 
 
 #endif // CPU_ARM_VM_NATIVEINST_ARM_HPP
diff --git a/src/hotspot/cpu/arm/nativeInst_arm_64.cpp b/src/hotspot/cpu/arm/nativeInst_arm_64.cpp
deleted file mode 100644
index 3f614d8519a..00000000000
--- a/src/hotspot/cpu/arm/nativeInst_arm_64.cpp
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "assembler_arm.inline.hpp"
-#include "code/codeCache.hpp"
-#include "memory/resourceArea.hpp"
-#include "nativeInst_arm.hpp"
-#include "oops/compressedOops.inline.hpp"
-#include "oops/klass.inline.hpp"
-#include "oops/oop.hpp"
-#include "runtime/handles.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "runtime/stubRoutines.hpp"
-#include "utilities/ostream.hpp"
-#ifdef COMPILER1
-#include "c1/c1_Runtime1.hpp"
-#endif
-
-void RawNativeInstruction::verify() {
-  // make sure code pattern is actually an instruction address
-  address addr = instruction_address();
-  if (addr == NULL || ((intptr_t)addr & (instruction_size - 1)) != 0) {
-    fatal("not an instruction address");
-  }
-}
-
-void NativeMovRegMem::set_offset(int x) {
-  int scale = get_offset_scale();
-  assert((x & right_n_bits(scale)) == 0, "offset should be aligned");
-  guarantee((x >> 24) == 0, "encoding constraint");
-
-  if (Assembler::is_unsigned_imm_in_range(x, 12, scale)) {
-    set_unsigned_imm(x, 12, get_offset_scale(), 10);
-    return;
-  }
-
-  // If offset is too large to be placed into single ldr/str instruction, we replace
-  //   ldr/str  Rt, [Rn, #offset]
-  //   nop
-  // with
-  //   add  LR, Rn, #offset_hi
-  //   ldr/str  Rt, [LR, #offset_lo]
-
-  // Note: Rtemp cannot be used as a temporary register as it could be used
-  // for value being stored (see LIR_Assembler::reg2mem).
-  // Patchable NativeMovRegMem instructions are generated in LIR_Assembler::mem2reg and LIR_Assembler::reg2mem
-  // which do not use LR, so it is free. Also, it does not conflict with LR usages in c1_LIRGenerator_arm.cpp.
-  const int tmp = LR->encoding();
-  const int rn = (encoding() >> 5) & 0x1f;
-
-  NativeInstruction* next = nativeInstruction_at(next_raw_instruction_address());
-  assert(next->is_nop(), "must be");
-
-  next->set_encoding((encoding() & 0xffc0001f) | Assembler::encode_unsigned_imm((x & 0xfff), 12, scale, 10) | tmp << 5);
-  this->set_encoding(0x91400000 | Assembler::encode_unsigned_imm((x >> 12), 12, 0, 10) | rn << 5 | tmp);
-}
-
-intptr_t NativeMovConstReg::_data() const {
-#ifdef COMPILER2
-  if (is_movz()) {
-    // narrow constant or ic call cached value
-    RawNativeInstruction* ni = next_raw();
-    assert(ni->is_movk(), "movz;movk expected");
-    uint lo16 = (encoding() >> 5) & 0xffff;
-    intptr_t hi = 0;
-    int i = 0;
-    while (ni->is_movk() && i < 3) {
-      uint hi16 = (ni->encoding() >> 5) & 0xffff;
-      int shift = ((ni->encoding() >> 21) & 0x3) << 4;
-      hi |= (intptr_t)hi16 << shift;
-      ni = ni->next_raw();
-      ++i;
-    }
-    return lo16 | hi;
-  }
-#endif
-  return (intptr_t)(nativeLdrLiteral_at(instruction_address())->literal_value());
-}
-
-static void raw_set_data(RawNativeInstruction* si, intptr_t x, oop* oop_addr, Metadata** metadata_addr) {
-#ifdef COMPILER2
-  if (si->is_movz()) {
-    // narrow constant or ic call cached value
-    uintptr_t nx = 0;
-    int val_size = 32;
-    if (oop_addr != NULL) {
-      narrowOop encoded_oop = CompressedOops::encode(*oop_addr);
-      nx = encoded_oop;
-    } else if (metadata_addr != NULL) {
-      assert((*metadata_addr)->is_klass(), "expected Klass");
-      narrowKlass encoded_k = Klass::encode_klass((Klass *)*metadata_addr);
-      nx = encoded_k;
-    } else {
-      nx = x;
-      val_size = 64;
-    }
-    RawNativeInstruction* ni = si->next_raw();
-    uint lo16 = nx & 0xffff;
-    int shift = 16;
-    int imm16 = 0xffff << 5;
-    si->set_encoding((si->encoding() & ~imm16) | (lo16 << 5));
-    while (shift < val_size) {
-      assert(ni->is_movk(), "movk expected");
-      assert((((ni->encoding() >> 21) & 0x3) << 4) == shift, "wrong shift");
-      uint hi16 = (nx >> shift) & 0xffff;
-      ni->set_encoding((ni->encoding() & ~imm16) | (hi16 << 5));
-      shift += 16;
-      ni = ni->next_raw();
-    }
-    return;
-  }
-#endif
-
-  assert(si->is_ldr_literal(), "should be");
-
-  if (oop_addr == NULL && metadata_addr == NULL) {
-    // A static ldr_literal without oop_relocation
-    nativeLdrLiteral_at(si->instruction_address())->set_literal_value((address)x);
-  } else {
-    // Oop is loaded from oops section
-    address addr = oop_addr != NULL ? (address)oop_addr : (address)metadata_addr;
-    int offset = addr - si->instruction_address();
-
-    assert((((intptr_t)addr) & 0x7) == 0, "target address should be aligned");
-    assert((offset & 0x3) == 0, "offset should be aligned");
-
-    guarantee(Assembler::is_offset_in_range(offset, 19), "offset is not in range");
-    nativeLdrLiteral_at(si->instruction_address())->set_literal_address(si->instruction_address() + offset);
-  }
-}
-
-void NativeMovConstReg::set_data(intptr_t x) {
-  // Find and replace the oop corresponding to this instruction in oops section
-  oop* oop_addr = NULL;
-  Metadata** metadata_addr = NULL;
-  CodeBlob* cb = CodeCache::find_blob(instruction_address());
-  {
-    nmethod* nm = cb->as_nmethod_or_null();
-    if (nm != NULL) {
-      RelocIterator iter(nm, instruction_address(), next_raw()->instruction_address());
-      while (iter.next()) {
-        if (iter.type() == relocInfo::oop_type) {
-          oop_addr = iter.oop_reloc()->oop_addr();
-          *oop_addr = cast_to_oop(x);
-          break;
-        } else if (iter.type() == relocInfo::metadata_type) {
-          metadata_addr = iter.metadata_reloc()->metadata_addr();
-          *metadata_addr = (Metadata*)x;
-          break;
-        }
-      }
-    }
-  }
-  raw_set_data(adjust(this), x, oop_addr,  metadata_addr);
-}
-
-void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
-}
-
-void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
-  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "should be");
-
-  NativeInstruction* instr = nativeInstruction_at(verified_entry);
-  assert(instr->is_nop() || instr->encoding() == zombie_illegal_instruction, "required for MT-safe patching");
-  instr->set_encoding(zombie_illegal_instruction);
-}
-
-void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
-  assert (nativeInstruction_at(instr_addr)->is_b(), "MT-safe patching of arbitrary instructions is not allowed");
-  assert (nativeInstruction_at(code_buffer)->is_nop(), "MT-safe patching of arbitrary instructions is not allowed");
-  nativeInstruction_at(instr_addr)->set_encoding(*(int*)code_buffer);
-}
-
-void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
-  // Insert at code_pos unconditional B instruction jumping to entry
-  intx offset = entry - code_pos;
-  assert (Assembler::is_offset_in_range(offset, 26), "offset is out of range");
-
-  NativeInstruction* instr = nativeInstruction_at(code_pos);
-  assert (instr->is_b() || instr->is_nop(), "MT-safe patching of arbitrary instructions is not allowed");
-
-  instr->set_encoding(0x5 << 26 | Assembler::encode_offset(offset, 26, 0));
-}
-
-static address call_for(address return_address) {
-  CodeBlob* cb = CodeCache::find_blob(return_address);
-  nmethod* nm = cb->as_nmethod_or_null();
-  if (nm == NULL) {
-    ShouldNotReachHere();
-    return NULL;
-  }
-
-  // Look back 8 instructions (for LIR_Assembler::ic_call and MacroAssembler::patchable_call)
-  address begin = return_address - 8*NativeInstruction::instruction_size;
-  if (begin < nm->code_begin()) {
-    begin = nm->code_begin();
-  }
-  RelocIterator iter(nm, begin, return_address);
-  while (iter.next()) {
-    Relocation* reloc = iter.reloc();
-    if (reloc->is_call()) {
-      address call = reloc->addr();
-      if (nativeInstruction_at(call)->is_call()) {
-        if (nativeCall_at(call)->return_address() == return_address) {
-          return call;
-        }
-      }
-    }
-  }
-
-  return NULL;
-}
-
-bool NativeCall::is_call_before(address return_address) {
-  return (call_for(return_address) != NULL);
-}
-
-NativeCall* nativeCall_before(address return_address) {
-  assert(NativeCall::is_call_before(return_address), "must be");
-  return nativeCall_at(call_for(return_address));
-}
diff --git a/src/hotspot/cpu/arm/nativeInst_arm_64.hpp b/src/hotspot/cpu/arm/nativeInst_arm_64.hpp
deleted file mode 100644
index 424d862404c..00000000000
--- a/src/hotspot/cpu/arm/nativeInst_arm_64.hpp
+++ /dev/null
@@ -1,771 +0,0 @@
-/*
- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef CPU_ARM_VM_NATIVEINST_ARM_64_HPP
-#define CPU_ARM_VM_NATIVEINST_ARM_64_HPP
-
-#include "asm/macroAssembler.hpp"
-#include "code/codeCache.hpp"
-#include "runtime/icache.hpp"
-#include "runtime/os.hpp"
-
-// -------------------------------------------------------------------
-
-// Some experimental projects extend the ARM back-end by implementing
-// what the front-end usually assumes is a single native instruction
-// with a sequence of instructions.
-//
-// The 'Raw' variants are the low level initial code (usually one
-// instruction wide but some of them were already composed
-// instructions). They should be used only by the back-end.
-//
-// The non-raw classes are the front-end entry point, hiding potential
-// back-end extensions or the actual instructions size.
-class NativeInstruction;
-
-class RawNativeInstruction {
- public:
-
-  enum ARM_specific {
-    instruction_size = Assembler::InstructionSize,
-    instruction_size_in_bits = instruction_size * BitsPerByte,
-  };
-
-  // illegal instruction used by NativeJump::patch_verified_entry
-  static const int zombie_illegal_instruction = 0xd4000542; // hvc #42
-
-  address addr_at(int offset)        const { return (address)this + offset; }
-  address instruction_address()      const { return addr_at(0); }
-  address next_raw_instruction_address() const { return addr_at(instruction_size); }
-
-  static RawNativeInstruction* at(address address) {
-    return (RawNativeInstruction*)address;
-  }
-
-  RawNativeInstruction* next_raw() const {
-    return at(next_raw_instruction_address());
-  }
-
-  int encoding() const {
-    return *(int*)this;
-  }
-
-  void set_encoding(int value) {
-    int old = encoding();
-    if (old != value) {
-      *(int*)this = value;
-      ICache::invalidate_word((address)this);
-    }
-  }
-
-  bool is_nop()                      const { return encoding() == (int)0xd503201f; }
-  bool is_b()                        const { return (encoding() & 0xfc000000) == 0x14000000; } // unconditional branch
-  bool is_b_cond()                   const { return (encoding() & 0xff000010) == 0x54000000; } // conditional branch
-  bool is_bl()                       const { return (encoding() & 0xfc000000) == 0x94000000; }
-  bool is_br()                       const { return (encoding() & 0xfffffc1f) == 0xd61f0000; }
-  bool is_blr()                      const { return (encoding() & 0xfffffc1f) == 0xd63f0000; }
-  bool is_ldr_literal()              const { return (encoding() & 0xff000000) == 0x58000000; }
-  bool is_adr_aligned()              const { return (encoding() & 0xff000000) == 0x10000000; } // adr Xn, <label>, where label is aligned to 4 bytes (address of instruction).
-  bool is_adr_aligned_lr()           const { return (encoding() & 0xff00001f) == 0x1000001e; } // adr LR, <label>, where label is aligned to 4 bytes (address of instruction).
-
-  bool is_ldr_str_gp_reg_unsigned_imm()   const { return (encoding() & 0x3f000000) == 0x39000000; } // ldr/str{b, sb, h, sh, _w, sw} Rt, [Rn, #imm]
-  bool is_ldr_str_fp_reg_unsigned_imm()   const { return (encoding() & 0x3f000000) == 0x3D000000; } // ldr/str Rt(SIMD), [Rn, #imm]
-  bool is_ldr_str_reg_unsigned_imm()      const { return is_ldr_str_gp_reg_unsigned_imm() || is_ldr_str_fp_reg_unsigned_imm(); }
-
-  bool is_stp_preindex()             const { return (encoding() & 0xffc00000) == 0xa9800000; } // stp Xt1, Xt2, [Xn, #imm]!
-  bool is_ldp_postindex()            const { return (encoding() & 0xffc00000) == 0xa8c00000; } // ldp Xt1, Xt2, [Xn] #imm
-  bool is_mov_sp()                   const { return (encoding() & 0xfffffc00) == 0x91000000; } // mov <Xn|SP>, <Xm|SP>
-  bool is_movn()                     const { return (encoding() & 0x7f800000) == 0x12800000; }
-  bool is_movz()                     const { return (encoding() & 0x7f800000) == 0x52800000; }
-  bool is_movk()                     const { return (encoding() & 0x7f800000) == 0x72800000; }
-  bool is_orr_imm()                  const { return (encoding() & 0x7f800000) == 0x32000000; }
-  bool is_cmp_rr()                   const { return (encoding() & 0x7fe00000) == 0x6b000000; }
-  bool is_csel()                     const { return (encoding() & 0x7fe00000) == 0x1a800000; }
-  bool is_sub_shift()                const { return (encoding() & 0x7f200000) == 0x4b000000; } // sub Rd, Rn, shift (Rm, imm)
-  bool is_mov()                      const { return (encoding() & 0x7fe0ffe0) == 0x2a0003e0; } // mov Rd, Rm (orr Rd, ZR, shift (Rm, 0))
-  bool is_tst()                      const { return (encoding() & 0x7f20001f) == 0x6a00001f; } // tst Rn, shift (Rm, imm) (ands ZR, Rn, shift(Rm, imm))
-  bool is_lsr_imm()                  const { return (encoding() & 0x7f807c00) == 0x53007c00; } // lsr Rd, Rn, imm (ubfm Rd, Rn, imm, 31/63)
-
-  bool is_far_jump()                 const { return is_ldr_literal() && next_raw()->is_br(); }
-  bool is_fat_call()                 const {
-    return
-#ifdef COMPILER2
-      (is_blr() && next_raw()->is_b()) ||
-#endif
-      (is_adr_aligned_lr() && next_raw()->is_br());
-  }
-  bool is_far_call()                 const {
-    return is_ldr_literal() && next_raw()->is_fat_call();
-  }
-
-  bool is_ic_near_call()             const { return is_adr_aligned_lr() && next_raw()->is_b(); }
-  bool is_ic_far_call()              const { return is_adr_aligned_lr() && next_raw()->is_ldr_literal() && next_raw()->next_raw()->is_br(); }
-  bool is_ic_call()                  const { return is_ic_near_call() || is_ic_far_call(); }
-
-  bool is_jump()                     const { return is_b() || is_far_jump(); }
-  bool is_call()                     const { return is_bl() || is_far_call() || is_ic_call(); }
-  bool is_branch()                   const { return is_b() || is_bl(); }
-
-  // c2 doesn't use fixed registers for safepoint poll address
-  bool is_safepoint_poll() const {
-    return true;
-  }
-
-  bool is_save_all_registers(const RawNativeInstruction** next) const {
-    const RawNativeInstruction* current = this;
-
-    if (!current->is_stp_preindex()) return false; current = current->next_raw();
-    for (int i = 28; i >= 0; i -= 2) {
-      if (!current->is_stp_preindex()) return false; current = current->next_raw();
-    }
-
-    if (!current->is_adr_aligned())                 return false; current = current->next_raw();
-    if (!current->is_ldr_str_gp_reg_unsigned_imm()) return false; current = current->next_raw();
-    if (!current->is_ldr_str_gp_reg_unsigned_imm()) return false; current = current->next_raw();
-
-    *next = (RawNativeInstruction*) current;
-    return true;
-  }
-
-  bool is_restore_all_registers(const RawNativeInstruction** next) const {
-    const RawNativeInstruction* current = this;
-
-    for (int i = 0; i <= 28; i += 2) {
-      if (!current->is_ldp_postindex()) return false; current = current->next_raw();
-    }
-    if (!current->is_ldp_postindex()) return false; current = current->next_raw();
-
-    *next = (RawNativeInstruction*) current;
-    return true;
-  }
-
-  const RawNativeInstruction* skip_bind_literal() const {
-    const RawNativeInstruction* current = this;
-    if (((uintptr_t)current) % wordSize != 0) {
-      assert(current->is_nop(), "should be");
-      current = current->next_raw();
-    }
-    assert(((uintptr_t)current) % wordSize == 0, "should be"); // bound literal should be aligned
-    current = current->next_raw()->next_raw();
-    return current;
-  }
-
-  bool is_stop(const RawNativeInstruction** next) const {
-    const RawNativeInstruction* current = this;
-
-    if (!current->is_save_all_registers(&current)) return false;
-    if (!current->is_ldr_literal())                return false; current = current->next_raw();
-    if (!current->is_mov_sp())                     return false; current = current->next_raw();
-    if (!current->is_ldr_literal())                return false; current = current->next_raw();
-    if (!current->is_br())                         return false; current = current->next_raw();
-
-    current = current->skip_bind_literal();
-    current = current->skip_bind_literal();
-
-    *next = (RawNativeInstruction*) current;
-    return true;
-  }
-
-  bool is_mov_slow(const RawNativeInstruction** next = NULL) const {
-    const RawNativeInstruction* current = this;
-
-    if (current->is_orr_imm()) {
-      current = current->next_raw();
-
-    } else if (current->is_movn() || current->is_movz()) {
-      current = current->next_raw();
-      int movkCount = 0;
-      while (current->is_movk()) {
-        movkCount++;
-        if (movkCount > 3) return false;
-        current = current->next_raw();
-      }
-
-    } else {
-      return false;
-    }
-
-    if (next != NULL) {
-      *next = (RawNativeInstruction*)current;
-    }
-    return true;
-  }
-
-#ifdef ASSERT
-  void skip_verify_heapbase(const RawNativeInstruction** next) const {
-    const RawNativeInstruction* current = this;
-
-    if (CheckCompressedOops) {
-      if (!current->is_ldr_str_gp_reg_unsigned_imm()) return; current = current->next_raw();
-      if (!current->is_stp_preindex())      return; current = current->next_raw();
-      // NOTE: temporary workaround, remove with m6-01?
-      // skip saving condition flags
-      current = current->next_raw();
-      current = current->next_raw();
-
-      if (!current->is_mov_slow(&current))  return;
-      if (!current->is_cmp_rr())            return; current = current->next_raw();
-      if (!current->is_b_cond())            return; current = current->next_raw();
-      if (!current->is_stop(&current))      return;
-
-#ifdef COMPILER2
-      if (current->is_nop()) current = current->next_raw();
-#endif
-      // NOTE: temporary workaround, remove with m6-01?
-      // skip restoring condition flags
-      current = current->next_raw();
-      current = current->next_raw();
-
-      if (!current->is_ldp_postindex())     return; current = current->next_raw();
-      if (!current->is_ldr_str_gp_reg_unsigned_imm()) return; current = current->next_raw();
-    }
-
-    *next = (RawNativeInstruction*) current;
-  }
-#endif // ASSERT
-
-  bool is_ldr_global_ptr(const RawNativeInstruction** next) const {
-    const RawNativeInstruction* current = this;
-
-    if (!current->is_mov_slow(&current))            return false;
-    if (!current->is_ldr_str_gp_reg_unsigned_imm()) return false; current = current->next_raw();
-
-    *next = (RawNativeInstruction*) current;
-    return true;
-  }
-
-  void skip_verify_oop(const RawNativeInstruction** next) const {
-    const RawNativeInstruction* current = this;
-
-    if (VerifyOops) {
-      if (!current->is_save_all_registers(&current)) return;
-
-      if (current->is_mov()) {
-        current = current->next_raw();
-      }
-
-      if (!current->is_mov_sp())                        return; current = current->next_raw();
-      if (!current->is_ldr_literal())                   return; current = current->next_raw();
-      if (!current->is_ldr_global_ptr(&current))        return;
-      if (!current->is_blr())                           return; current = current->next_raw();
-      if (!current->is_restore_all_registers(&current)) return;
-      if (!current->is_b())                             return; current = current->next_raw();
-
-      current = current->skip_bind_literal();
-    }
-
-    *next = (RawNativeInstruction*) current;
-  }
-
-  void skip_encode_heap_oop(const RawNativeInstruction** next) const {
-    const RawNativeInstruction* current = this;
-
-    assert (Universe::heap() != NULL, "java heap should be initialized");
-#ifdef ASSERT
-    current->skip_verify_heapbase(&current);
-#endif // ASSERT
-    current->skip_verify_oop(&current);
-
-    if (Universe::narrow_oop_base() == NULL) {
-      if (Universe::narrow_oop_shift() != 0) {
-        if (!current->is_lsr_imm()) return; current = current->next_raw();
-      } else {
-        if (current->is_mov()) {
-          current = current->next_raw();
-        }
-      }
-    } else {
-      if (!current->is_tst())       return; current = current->next_raw();
-      if (!current->is_csel())      return; current = current->next_raw();
-      if (!current->is_sub_shift()) return; current = current->next_raw();
-      if (Universe::narrow_oop_shift() != 0) {
-        if (!current->is_lsr_imm())  return; current = current->next_raw();
-      }
-    }
-
-    *next = (RawNativeInstruction*) current;
-  }
-
-  void verify();
-
-  // For unit tests
-  static void test() {}
-
- private:
-
-  void check_bits_range(int bits, int scale, int low_bit) const {
-    assert((0 <= low_bit) && (0 < bits) && (low_bit + bits <= instruction_size_in_bits), "invalid bits range");
-    assert((0 <= scale) && (scale <= 4), "scale is out of range");
-  }
-
-  void set_imm(int imm_encoding, int bits, int low_bit) {
-    int imm_mask = right_n_bits(bits) << low_bit;
-    assert((imm_encoding & ~imm_mask) == 0, "invalid imm encoding");
-    set_encoding((encoding() & ~imm_mask) | imm_encoding);
-  }
-
- protected:
-
-  // Returns signed immediate from [low_bit .. low_bit + bits - 1] bits of this instruction, scaled by given scale.
-  int get_signed_imm(int bits, int scale, int low_bit) const {
-    check_bits_range(bits, scale, low_bit);
-    int high_bits_to_clean = (instruction_size_in_bits - (low_bit + bits));
-    return encoding() << high_bits_to_clean >> (high_bits_to_clean + low_bit) << scale;
-  }
-
-  // Puts given signed immediate into the [low_bit .. low_bit + bits - 1] bits of this instruction.
-  void set_signed_imm(int value, int bits, int scale, int low_bit) {
-    set_imm(Assembler::encode_imm(value, bits, scale, low_bit), bits, low_bit);
-  }
-
-  // Returns unsigned immediate from [low_bit .. low_bit + bits - 1] bits of this instruction, scaled by given scale.
-  int get_unsigned_imm(int bits, int scale, int low_bit) const {
-    check_bits_range(bits, scale, low_bit);
-    return ((encoding() >> low_bit) & right_n_bits(bits)) << scale;
-  }
-
-  // Puts given unsigned immediate into the [low_bit .. low_bit + bits - 1] bits of this instruction.
-  void set_unsigned_imm(int value, int bits, int scale, int low_bit) {
-    set_imm(Assembler::encode_unsigned_imm(value, bits, scale, low_bit), bits, low_bit);
-  }
-
-  int get_signed_offset(int bits, int low_bit) const {
-    return get_signed_imm(bits, 2, low_bit);
-  }
-
-  void set_signed_offset(int offset, int bits, int low_bit) {
-    set_signed_imm(offset, bits, 2, low_bit);
-  }
-};
-
-inline RawNativeInstruction* rawNativeInstruction_at(address address) {
-  RawNativeInstruction* instr = RawNativeInstruction::at(address);
-#ifdef ASSERT
-  instr->verify();
-#endif // ASSERT
-  return instr;
-}
-
-// -------------------------------------------------------------------
-
-// Load/store register (unsigned scaled immediate)
-class NativeMovRegMem: public RawNativeInstruction {
- private:
-  int get_offset_scale() const {
-    return get_unsigned_imm(2, 0, 30);
-  }
-
- public:
-  int offset() const {
-    return get_unsigned_imm(12, get_offset_scale(), 10);
-  }
-
-  void set_offset(int x);
-
-  void add_offset_in_bytes(int add_offset) {
-    set_offset(offset() + add_offset);
-  }
-};
-
-inline NativeMovRegMem* nativeMovRegMem_at(address address) {
-  const RawNativeInstruction* instr = rawNativeInstruction_at(address);
-
-#ifdef COMPILER1
-    // NOP required for C1 patching
-    if (instr->is_nop()) {
-      instr = instr->next_raw();
-    }
-#endif
-
-  instr->skip_encode_heap_oop(&instr);
-
-  assert(instr->is_ldr_str_reg_unsigned_imm(), "must be");
-  return (NativeMovRegMem*)instr;
-}
-
-// -------------------------------------------------------------------
-
-class NativeInstruction : public RawNativeInstruction {
-public:
-  static NativeInstruction* at(address address) {
-    return (NativeInstruction*)address;
-  }
-
-public:
-  // No need to consider indirections while parsing NativeInstruction
-  address next_instruction_address() const {
-    return next_raw_instruction_address();
-  }
-
-  // next() is no longer defined to avoid confusion.
-  //
-  // The front end and most classes except for those defined in nativeInst_arm
-  // or relocInfo_arm should only use next_instruction_address(), skipping
-  // over composed instruction and ignoring back-end extensions.
-  //
-  // The back-end can use next_raw() when it knows the instruction sequence
-  // and only wants to skip a single native instruction.
-};
-
-inline NativeInstruction* nativeInstruction_at(address address) {
-  NativeInstruction* instr = NativeInstruction::at(address);
-#ifdef ASSERT
-  instr->verify();
-#endif // ASSERT
-  return instr;
-}
-
-// -------------------------------------------------------------------
-class NativeInstructionLdrLiteral: public NativeInstruction {
- public:
-  address literal_address() {
-    address la = instruction_address() + get_signed_offset(19, 5);
-    assert(la != instruction_address(), "literal points to instruction");
-    return la;
-  }
-
-  address after_literal_address() {
-    return literal_address() + wordSize;
-  }
-
-  void set_literal_address(address addr, address pc) {
-    assert(is_ldr_literal(), "must be");
-    int opc = (encoding() >> 30) & 0x3;
-    assert (opc != 0b01 || addr == pc || ((uintx)addr & 7) == 0, "ldr target should be aligned");
-    set_signed_offset(addr - pc, 19, 5);
-  }
-
-  void set_literal_address(address addr) {
-    set_literal_address(addr, instruction_address());
-  }
-
-  address literal_value() {
-    return *(address*)literal_address();
-  }
-
-  void set_literal_value(address dest) {
-    *(address*)literal_address() = dest;
-  }
-};
-
-inline NativeInstructionLdrLiteral* nativeLdrLiteral_at(address address) {
-  assert(nativeInstruction_at(address)->is_ldr_literal(), "must be");
-  return (NativeInstructionLdrLiteral*)address;
-}
-
-// -------------------------------------------------------------------
-// Common class for branch instructions with 26-bit immediate offset: B (unconditional) and BL
-class NativeInstructionBranchImm26: public NativeInstruction {
- public:
-  address destination(int adj = 0) const {
-    return instruction_address() + get_signed_offset(26, 0) + adj;
-  }
-
-  void set_destination(address dest) {
-    intptr_t offset = (intptr_t)(dest - instruction_address());
-    assert((offset & 0x3) == 0, "should be aligned");
-    set_signed_offset(offset, 26, 0);
-  }
-};
-
-inline NativeInstructionBranchImm26* nativeB_at(address address) {
-  assert(nativeInstruction_at(address)->is_b(), "must be");
-  return (NativeInstructionBranchImm26*)address;
-}
-
-inline NativeInstructionBranchImm26* nativeBL_at(address address) {
-  assert(nativeInstruction_at(address)->is_bl(), "must be");
-  return (NativeInstructionBranchImm26*)address;
-}
-
-// -------------------------------------------------------------------
-class NativeInstructionAdrLR: public NativeInstruction {
- public:
-  // Returns address which is loaded into LR by this instruction.
-  address target_lr_value() {
-    return instruction_address() + get_signed_offset(19, 5);
-  }
-};
-
-inline NativeInstructionAdrLR* nativeAdrLR_at(address address) {
-  assert(nativeInstruction_at(address)->is_adr_aligned_lr(), "must be");
-  return (NativeInstructionAdrLR*)address;
-}
-
-// -------------------------------------------------------------------
-class RawNativeCall: public NativeInstruction {
- public:
-
-  address return_address() const {
-    if (is_bl()) {
-      return next_raw_instruction_address();
-
-    } else if (is_far_call()) {
-#ifdef COMPILER2
-      if (next_raw()->is_blr()) {
-        // ldr_literal; blr; ret_addr: b skip_literal;
-        return addr_at(2 * instruction_size);
-      }
-#endif
-      assert(next_raw()->is_adr_aligned_lr() && next_raw()->next_raw()->is_br(), "must be");
-      return nativeLdrLiteral_at(instruction_address())->after_literal_address();
-
-    } else if (is_ic_call()) {
-      return nativeAdrLR_at(instruction_address())->target_lr_value();
-
-    } else {
-      ShouldNotReachHere();
-      return NULL;
-    }
-  }
-
-  address destination(int adj = 0) const {
-    if (is_bl()) {
-      return nativeBL_at(instruction_address())->destination(adj);
-
-    } else if (is_far_call()) {
-      return nativeLdrLiteral_at(instruction_address())->literal_value();
-
-    } else if (is_adr_aligned_lr()) {
-      RawNativeInstruction *next = next_raw();
-      if (next->is_b()) {
-        // ic_near_call
-        return nativeB_at(next->instruction_address())->destination(adj);
-      } else if (next->is_far_jump()) {
-        // ic_far_call
-        return nativeLdrLiteral_at(next->instruction_address())->literal_value();
-      }
-    }
-    ShouldNotReachHere();
-    return NULL;
-  }
-
-  void set_destination(address dest) {
-    if (is_bl()) {
-      nativeBL_at(instruction_address())->set_destination(dest);
-      return;
-    }
-    if (is_far_call()) {
-      nativeLdrLiteral_at(instruction_address())->set_literal_value(dest);
-      OrderAccess::storeload(); // overkill if caller holds lock?
-      return;
-    }
-    if (is_adr_aligned_lr()) {
-      RawNativeInstruction *next = next_raw();
-      if (next->is_b()) {
-        // ic_near_call
-        nativeB_at(next->instruction_address())->set_destination(dest);
-        return;
-      }
-      if (next->is_far_jump()) {
-        // ic_far_call
-        nativeLdrLiteral_at(next->instruction_address())->set_literal_value(dest);
-        OrderAccess::storeload(); // overkill if caller holds lock?
-        return;
-      }
-    }
-    ShouldNotReachHere();
-  }
-
-  void set_destination_mt_safe(address dest) {
-    assert(CodeCache::contains(dest), "call target should be from code cache (required by ic_call and patchable_call)");
-    set_destination(dest);
-  }
-
-  void verify() {
-    assert(RawNativeInstruction::is_call(), "should be");
-  }
-
-  void verify_alignment() {
-    // Nothing to do on ARM
-  }
-};
-
-inline RawNativeCall* rawNativeCall_at(address address) {
-  RawNativeCall * call = (RawNativeCall*)address;
-  call->verify();
-  return call;
-}
-
-class NativeCall: public RawNativeCall {
- public:
-
-  // NativeCall::next_instruction_address() is used only to define the
-  // range where to look for the relocation information. We need not
-  // walk over composed instructions (as long as the relocation information
-  // is associated to the first instruction).
-  address next_instruction_address() const {
-    return next_raw_instruction_address();
-  }
-
-  static bool is_call_before(address return_address);
-};
-
-inline NativeCall* nativeCall_at(address address) {
-  NativeCall * call = (NativeCall*)address;
-  call->verify();
-  return call;
-}
-
-NativeCall* nativeCall_before(address return_address);
-
-// -------------------------------------------------------------------
-class NativeGeneralJump: public NativeInstruction {
- public:
-
-  address jump_destination() const {
-    return nativeB_at(instruction_address())->destination();
-  }
-
-  static void replace_mt_safe(address instr_addr, address code_buffer);
-
-  static void insert_unconditional(address code_pos, address entry);
-
-};
-
-inline NativeGeneralJump* nativeGeneralJump_at(address address) {
-  assert(nativeInstruction_at(address)->is_b(), "must be");
-  return (NativeGeneralJump*)address;
-}
-
-// -------------------------------------------------------------------
-class RawNativeJump: public NativeInstruction {
- public:
-
-  address jump_destination(int adj = 0) const {
-    if (is_b()) {
-      address a = nativeB_at(instruction_address())->destination(adj);
-      // Jump destination -1 is encoded as a jump to self
-      if (a == instruction_address()) {
-        return (address)-1;
-      }
-      return a;
-    } else {
-      assert(is_far_jump(), "should be");
-      return nativeLdrLiteral_at(instruction_address())->literal_value();
-    }
-  }
-
-  void set_jump_destination(address dest) {
-    if (is_b()) {
-      // Jump destination -1 is encoded as a jump to self
-      if (dest == (address)-1) {
-        dest = instruction_address();
-      }
-      nativeB_at(instruction_address())->set_destination(dest);
-    } else {
-      assert(is_far_jump(), "should be");
-      nativeLdrLiteral_at(instruction_address())->set_literal_value(dest);
-    }
-  }
-};
-
-inline RawNativeJump* rawNativeJump_at(address address) {
-  assert(rawNativeInstruction_at(address)->is_jump(), "must be");
-  return (RawNativeJump*)address;
-}
-
-// -------------------------------------------------------------------
-class NativeMovConstReg: public NativeInstruction {
-
-  NativeMovConstReg *adjust() const {
-    return (NativeMovConstReg *)adjust(this);
-  }
-
- public:
-
-  static RawNativeInstruction *adjust(const RawNativeInstruction *ni) {
-#ifdef COMPILER1
-    // NOP required for C1 patching
-    if (ni->is_nop()) {
-      return ni->next_raw();
-    }
-#endif
-    return (RawNativeInstruction *)ni;
-  }
-
-  intptr_t _data() const;
-  void set_data(intptr_t x);
-
-  intptr_t data() const {
-    return adjust()->_data();
-  }
-
-  bool is_pc_relative() {
-    return adjust()->is_ldr_literal();
-  }
-
-  void _set_pc_relative_offset(address addr, address pc) {
-    assert(is_ldr_literal(), "must be");
-    nativeLdrLiteral_at(instruction_address())->set_literal_address(addr, pc);
-  }
-
-  void set_pc_relative_offset(address addr, address pc) {
-    NativeMovConstReg *ni = adjust();
-    int dest_adj = ni->instruction_address() - instruction_address();
-    ni->_set_pc_relative_offset(addr, pc + dest_adj);
-  }
-
-  address _next_instruction_address() const {
-#ifdef COMPILER2
-    if (is_movz()) {
-      // narrow constant
-      RawNativeInstruction* ni = next_raw();
-      assert(ni->is_movk(), "movz;movk expected");
-      return ni->next_raw_instruction_address();
-    }
-#endif
-    assert(is_ldr_literal(), "must be");
-    return NativeInstruction::next_raw_instruction_address();
-  }
-
-  address next_instruction_address() const {
-    return adjust()->_next_instruction_address();
-  }
-};
-
-inline NativeMovConstReg* nativeMovConstReg_at(address address) {
-  RawNativeInstruction* ni = rawNativeInstruction_at(address);
-
-  ni = NativeMovConstReg::adjust(ni);
-
-  assert(ni->is_mov_slow() || ni->is_ldr_literal(), "must be");
-  return (NativeMovConstReg*)address;
-}
-
-// -------------------------------------------------------------------
-class NativeJump: public RawNativeJump {
- public:
-
-  static void check_verified_entry_alignment(address entry, address verified_entry);
-
-  static void patch_verified_entry(address entry, address verified_entry, address dest);
-};
-
-inline NativeJump* nativeJump_at(address address) {
-  assert(nativeInstruction_at(address)->is_jump(), "must be");
-  return (NativeJump*)address;
-}
-
-#endif // CPU_ARM_VM_NATIVEINST_ARM_64_HPP
diff --git a/src/hotspot/cpu/arm/register_arm.cpp b/src/hotspot/cpu/arm/register_arm.cpp
index 9e75b68b764..a0ae9ff4f92 100644
--- a/src/hotspot/cpu/arm/register_arm.cpp
+++ b/src/hotspot/cpu/arm/register_arm.cpp
@@ -32,12 +32,6 @@ const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::num_fpr +
 
 const char* RegisterImpl::name() const {
   const char* names[number_of_registers] = {
-#ifdef AARCH64
-    "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
-    "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
-    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
-    "x24", "x25", "x26", "x27", "x28", "fp",  "lr",  "xzr", "sp"
-#else
     "r0", "r1", "r2", "r3", "r4", "r5", "r6",
 #if (FP_REG_NUM == 7)
     "fp",
@@ -51,19 +45,12 @@ const char* RegisterImpl::name() const {
     "r11",
 #endif
     "r12", "sp", "lr", "pc"
-#endif // AARCH64
   };
   return is_valid() ? names[encoding()] : "noreg";
 }
 
 const char* FloatRegisterImpl::name() const {
   const char* names[number_of_registers] = {
-#ifdef AARCH64
-    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
-    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
-    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
-    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
-#else
      "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
      "s8",  "s9", "s10", "s11", "s12", "s13", "s14", "s15",
     "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
@@ -74,7 +61,6 @@ const char* FloatRegisterImpl::name() const {
     "s48", "s49?","s50", "s51?","s52", "s53?","s54", "s55?",
     "s56", "s57?","s58", "s59?","s60", "s61?","s62", "s63?"
 #endif
-#endif // AARCH64
   };
   return is_valid() ? names[encoding()] : "fnoreg";
 }
diff --git a/src/hotspot/cpu/arm/register_arm.hpp b/src/hotspot/cpu/arm/register_arm.hpp
index bdaa1386c16..b6110a545c6 100644
--- a/src/hotspot/cpu/arm/register_arm.hpp
+++ b/src/hotspot/cpu/arm/register_arm.hpp
@@ -66,7 +66,6 @@ typedef VMRegImpl* VMReg;
 #define R9_IS_SCRATCHED 0
 #endif
 
-#ifndef AARCH64
 // FP_REG_NUM
 //
 // The ARM ABI does not state which register is used for the frame pointer.
@@ -77,7 +76,6 @@ typedef VMRegImpl* VMReg;
 // Default: FP is R11
 #define FP_REG_NUM 11
 #endif
-#endif // AARCH64
 
 // ALIGN_WIDE_ARGUMENTS
 //
@@ -113,32 +111,6 @@ typedef VMRegImpl* VMReg;
 #define R14    ((Register)14)
 #define R15    ((Register)15)
 
-#ifdef AARCH64
-
-#define R16    ((Register)16)
-#define R17    ((Register)17)
-#define R18    ((Register)18)
-#define R19    ((Register)19)
-#define R20    ((Register)20)
-#define R21    ((Register)21)
-#define R22    ((Register)22)
-#define R23    ((Register)23)
-#define R24    ((Register)24)
-#define R25    ((Register)25)
-#define R26    ((Register)26)
-#define R27    ((Register)27)
-#define R28    ((Register)28)
-#define R29    ((Register)29)
-#define R30    ((Register)30)
-#define ZR     ((Register)31)
-#define SP     ((Register)32)
-
-#define FP     R29
-#define LR     R30
-
-#define altFP_7_11 R7
-
-#else // !AARCH64
 
 #define FP     ((Register)FP_REG_NUM)
 
@@ -158,7 +130,6 @@ typedef VMRegImpl* VMReg;
 #define LR     R14
 #define PC     R15
 
-#endif // !AARCH64
 
 
 class RegisterImpl;
@@ -171,11 +142,7 @@ inline Register as_Register(int encoding) {
 class RegisterImpl : public AbstractRegisterImpl {
  public:
   enum {
-#ifdef AARCH64
-    number_of_gprs = 31,
-    zr_sp_encoding = 31,
-#endif
-    number_of_registers = AARCH64_ONLY(number_of_gprs + 2) NOT_AARCH64(16)
+    number_of_registers = 16
   };
 
   Register successor() const      { return as_Register(encoding() + 1); }
@@ -188,19 +155,10 @@ class RegisterImpl : public AbstractRegisterImpl {
   int   encoding() const          { assert(is_valid(), "invalid register"); return value(); }
   const char* name() const;
 
-#ifdef AARCH64
-  int encoding_with_zr() const   { assert (is_valid_gpr_or_zr(), "invalid register"); return (this == ZR) ? zr_sp_encoding : value(); }
-  int encoding_with_sp() const   { assert (is_valid_gpr_or_sp(), "invalid register"); return (this == SP) ? zr_sp_encoding : value(); }
-#endif
 
   // testers
   bool is_valid() const           { return 0 <= value() && value() < number_of_registers; }
 
-#ifdef AARCH64
-  bool is_valid_gpr()       const  { return (0 <= value() && value() < number_of_gprs); }
-  bool is_valid_gpr_or_zr() const  { return is_valid_gpr() || (this == ZR); }
-  bool is_valid_gpr_or_sp() const  { return is_valid_gpr() || (this == SP); }
-#endif
 };
 
 CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
@@ -217,11 +175,7 @@ inline FloatRegister as_FloatRegister(int encoding) {
 class FloatRegisterImpl : public AbstractRegisterImpl {
  public:
   enum {
-#ifdef AARCH64
-    number_of_registers = 32
-#else
     number_of_registers = NOT_COMPILER2(32) COMPILER2_PRESENT(64)
-#endif
   };
 
   inline friend FloatRegister as_FloatRegister(int encoding);
@@ -234,7 +188,6 @@ class FloatRegisterImpl : public AbstractRegisterImpl {
 
   const char* name() const;
 
-#ifndef AARCH64
   int hi_bits() const {
     return (encoding() >> 1) & 0xf;
   }
@@ -246,54 +199,10 @@ class FloatRegisterImpl : public AbstractRegisterImpl {
   int hi_bit() const {
     return encoding() >> 5;
   }
-#endif // !AARCH64
 };
 
 CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg, (-1));
 
-#ifdef AARCH64
-
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V0,     ( 0));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V1,     ( 1));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V2,     ( 2));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V3,     ( 3));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V4,     ( 4));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V5,     ( 5));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V6,     ( 6));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V7,     ( 7));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V8,     ( 8));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V9,     ( 9));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V10,    (10));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V11,    (11));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V12,    (12));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V13,    (13));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V14,    (14));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V15,    (15));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V16,    (16));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V17,    (17));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V18,    (18));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V19,    (19));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V20,    (20));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V21,    (21));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V22,    (22));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V23,    (23));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V24,    (24));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V25,    (25));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V26,    (26));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V27,    (27));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V28,    (28));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V29,    (29));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V30,    (30));
-CONSTANT_REGISTER_DECLARATION(FloatRegister, V31,    (31));
-
-#define S0       V0
-#define S1_reg   V1
-#define Stemp    V31
-
-#define D0       V0
-#define D1       V1
-
-#else // AARCH64
 
 /*
  * S1-S6 are named with "_reg" suffix to avoid conflict with
@@ -366,16 +275,15 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, D29,    (58));
 CONSTANT_REGISTER_DECLARATION(FloatRegister, D30,    (60));
 CONSTANT_REGISTER_DECLARATION(FloatRegister, D31,    (62));
 
-#endif // AARCH64
 
 class ConcreteRegisterImpl : public AbstractRegisterImpl {
  public:
   enum {
     log_vmregs_per_word = LogBytesPerWord - LogBytesPerInt, // VMRegs are of 4-byte size
 #ifdef COMPILER2
-    log_bytes_per_fpr  = AARCH64_ONLY(4) NOT_AARCH64(2), // quad vectors
+    log_bytes_per_fpr  = 2, // quad vectors
 #else
-    log_bytes_per_fpr  = AARCH64_ONLY(3) NOT_AARCH64(2), // double vectors
+    log_bytes_per_fpr  = 2, // double vectors
 #endif
     log_words_per_fpr  = log_bytes_per_fpr - LogBytesPerWord,
     words_per_fpr      = 1 << log_words_per_fpr,
@@ -388,17 +296,13 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
     max_gpr0 = num_gpr,
     num_fpr  = FloatRegisterImpl::number_of_registers << log_vmregs_per_fpr,
     max_fpr0 = max_gpr0 + num_fpr,
-    number_of_registers = num_gpr + num_fpr +
-                          // TODO-AARCH64 revise
-                          1+1 // APSR and FPSCR so that c2's REG_COUNT <= ConcreteRegisterImpl::number_of_registers
+    number_of_registers = num_gpr + num_fpr + 1+1 // APSR and FPSCR so that c2's REG_COUNT <= ConcreteRegisterImpl::number_of_registers
   };
 
   static const int max_gpr;
   static const int max_fpr;
 };
 
-// TODO-AARCH64 revise the following definitions
-
 class VFPSystemRegisterImpl;
 typedef VFPSystemRegisterImpl* VFPSystemRegister;
 class VFPSystemRegisterImpl : public AbstractRegisterImpl {
@@ -414,33 +318,21 @@ class VFPSystemRegisterImpl : public AbstractRegisterImpl {
 /*
  * Register definitions shared across interpreter and compiler
  */
-#define Rexception_obj   AARCH64_ONLY(R19) NOT_AARCH64(R4)
-#define Rexception_pc    AARCH64_ONLY(R20) NOT_AARCH64(R5)
-
-#ifdef AARCH64
-#define Rheap_base       R27
-#endif // AARCH64
+#define Rexception_obj   R4
+#define Rexception_pc    R5
 
 /*
  * Interpreter register definitions common to C++ and template interpreters.
  */
-#ifdef AARCH64
-#define Rlocals          R23
-#define Rmethod          R26
-#define Rthread          R28
-#define Rtemp            R16
-#define Rtemp2           R17
-#else
 #define Rlocals          R8
 #define Rmethod          R9
 #define Rthread          R10
 #define Rtemp            R12
-#endif // AARCH64
 
 // Interpreter calling conventions
 
-#define Rparams          AARCH64_ONLY(R8)  NOT_AARCH64(SP)
-#define Rsender_sp       AARCH64_ONLY(R19) NOT_AARCH64(R4)
+#define Rparams          SP
+#define Rsender_sp       R4
 
 // JSR292
 //  Note: R5_mh is needed only during the call setup, including adapters
@@ -479,25 +371,23 @@ class VFPSystemRegisterImpl : public AbstractRegisterImpl {
 #define D1_tmp                 D1
 
 // Temporary registers saved across VM calls (according to C calling conventions)
-#define Rtmp_save0             AARCH64_ONLY(R19) NOT_AARCH64(R4)
-#define Rtmp_save1             AARCH64_ONLY(R20) NOT_AARCH64(R5)
+#define Rtmp_save0             R4
+#define Rtmp_save1             R5
 
 // Cached TOS value
 #define R0_tos                 R0
 
-#ifndef AARCH64
 #define R0_tos_lo              R0
 #define R1_tos_hi              R1
-#endif
 
 #define S0_tos                 S0
 #define D0_tos                 D0
 
 // Dispatch table
-#define RdispatchTable         AARCH64_ONLY(R22) NOT_AARCH64(R6)
+#define RdispatchTable         R6
 
 // Bytecode pointer
-#define Rbcp                   AARCH64_ONLY(R24) NOT_AARCH64(altFP_7_11)
+#define Rbcp                   altFP_7_11
 
 // Pre-loaded next bytecode for the dispatch
 #define R3_bytecode            R3
@@ -507,7 +397,7 @@ class VFPSystemRegisterImpl : public AbstractRegisterImpl {
 #define R4_ArrayIndexOutOfBounds_index   R4
 
 // Interpreter expression stack top
-#define Rstack_top             AARCH64_ONLY(R25) NOT_AARCH64(SP)
+#define Rstack_top             SP
 
 /*
  * Linux 32-bit ARM C ABI Register calling conventions
@@ -529,28 +419,14 @@ class VFPSystemRegisterImpl : public AbstractRegisterImpl {
  *   R13 (SP)   Stack Pointer                 callee
  *   R14 (LR)   Link register
  *   R15 (PC)   Program Counter
- *
- * TODO-AARCH64: document AArch64 ABI
- *
  */
 #define c_rarg0  R0
 #define c_rarg1  R1
 #define c_rarg2  R2
 #define c_rarg3  R3
 
-#ifdef AARCH64
-#define c_rarg4  R4
-#define c_rarg5  R5
-#define c_rarg6  R6
-#define c_rarg7  R7
-#endif
 
-#ifdef AARCH64
-#define GPR_PARAMS    8
-#define FPR_PARAMS    8
-#else
 #define GPR_PARAMS    4
-#endif
 
 
 // Java ABI
@@ -560,11 +436,5 @@ class VFPSystemRegisterImpl : public AbstractRegisterImpl {
 #define j_rarg2  c_rarg2
 #define j_rarg3  c_rarg3
 
-#ifdef AARCH64
-#define j_rarg4  c_rarg4
-#define j_rarg5  c_rarg5
-#define j_rarg6  c_rarg6
-#define j_rarg7  c_rarg7
-#endif
 
 #endif // CPU_ARM_VM_REGISTER_ARM_HPP
diff --git a/src/hotspot/cpu/arm/register_definitions_arm.cpp b/src/hotspot/cpu/arm/register_definitions_arm.cpp
index 53fa4a2123e..4aa7714970c 100644
--- a/src/hotspot/cpu/arm/register_definitions_arm.cpp
+++ b/src/hotspot/cpu/arm/register_definitions_arm.cpp
@@ -31,42 +31,6 @@
 REGISTER_DEFINITION(Register, noreg);
 REGISTER_DEFINITION(FloatRegister, fnoreg);
 
-#ifdef AARCH64
-
-REGISTER_DEFINITION(FloatRegister, V0);
-REGISTER_DEFINITION(FloatRegister, V1);
-REGISTER_DEFINITION(FloatRegister, V2);
-REGISTER_DEFINITION(FloatRegister, V3);
-REGISTER_DEFINITION(FloatRegister, V4);
-REGISTER_DEFINITION(FloatRegister, V5);
-REGISTER_DEFINITION(FloatRegister, V6);
-REGISTER_DEFINITION(FloatRegister, V7);
-REGISTER_DEFINITION(FloatRegister, V8);
-REGISTER_DEFINITION(FloatRegister, V9);
-REGISTER_DEFINITION(FloatRegister, V10);
-REGISTER_DEFINITION(FloatRegister, V11);
-REGISTER_DEFINITION(FloatRegister, V12);
-REGISTER_DEFINITION(FloatRegister, V13);
-REGISTER_DEFINITION(FloatRegister, V14);
-REGISTER_DEFINITION(FloatRegister, V15);
-REGISTER_DEFINITION(FloatRegister, V16);
-REGISTER_DEFINITION(FloatRegister, V17);
-REGISTER_DEFINITION(FloatRegister, V18);
-REGISTER_DEFINITION(FloatRegister, V19);
-REGISTER_DEFINITION(FloatRegister, V20);
-REGISTER_DEFINITION(FloatRegister, V21);
-REGISTER_DEFINITION(FloatRegister, V22);
-REGISTER_DEFINITION(FloatRegister, V23);
-REGISTER_DEFINITION(FloatRegister, V24);
-REGISTER_DEFINITION(FloatRegister, V25);
-REGISTER_DEFINITION(FloatRegister, V26);
-REGISTER_DEFINITION(FloatRegister, V27);
-REGISTER_DEFINITION(FloatRegister, V28);
-REGISTER_DEFINITION(FloatRegister, V29);
-REGISTER_DEFINITION(FloatRegister, V30);
-REGISTER_DEFINITION(FloatRegister, V31);
-
-#else // AARCH64
 
 REGISTER_DEFINITION(FloatRegister, S0);
 REGISTER_DEFINITION(FloatRegister, S1_reg);
@@ -134,4 +98,3 @@ REGISTER_DEFINITION(FloatRegister, D29);
 REGISTER_DEFINITION(FloatRegister, D30);
 REGISTER_DEFINITION(FloatRegister, D31);
 
-#endif //AARCH64
diff --git a/src/hotspot/cpu/arm/relocInfo_arm.cpp b/src/hotspot/cpu/arm/relocInfo_arm.cpp
index 6d38ab8fd3b..25a5ee3b7c5 100644
--- a/src/hotspot/cpu/arm/relocInfo_arm.cpp
+++ b/src/hotspot/cpu/arm/relocInfo_arm.cpp
@@ -35,21 +35,6 @@
 void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
 
   NativeMovConstReg* ni = nativeMovConstReg_at(addr());
-#if defined(AARCH64) && defined(COMPILER2)
-  if (ni->is_movz()) {
-    assert(type() == relocInfo::oop_type, "!");
-    if (verify_only) {
-      uintptr_t d = ni->data();
-      guarantee((d >> 32) == 0, "not narrow oop");
-      narrowOop no = d;
-      oop o = CompressedOops::decode(no);
-      guarantee(cast_from_oop<intptr_t>(o) == (intptr_t)x, "instructions must match");
-    } else {
-      ni->set_data((intptr_t)x);
-    }
-    return;
-  }
-#endif
   if (verify_only) {
     guarantee(ni->data() == (intptr_t)(x + o), "instructions must match");
   } else {
@@ -69,21 +54,16 @@ address Relocation::pd_call_destination(address orig_addr) {
 
   RawNativeInstruction* ni = rawNativeInstruction_at(pc);
 
-#if (!defined(AARCH64))
-  if (NOT_AARCH64(ni->is_add_lr()) AARCH64_ONLY(ni->is_adr_aligned_lr())) {
-    // On arm32, skip the optional 'add LR, PC, #offset'
+  if (ni->is_add_lr()) {
+    // Skip the optional 'add LR, PC, #offset'
     // (Allowing the jump support code to handle fat_call)
     pc = ni->next_raw_instruction_address();
     ni = nativeInstruction_at(pc);
   }
-#endif
 
-  if (AARCH64_ONLY(ni->is_call()) NOT_AARCH64(ni->is_bl())) {
-    // For arm32, fat_call are handled by is_jump for the new 'ni',
+  if (ni->is_bl()) {
+    // Fat_call are handled by is_jump for the new 'ni',
     // requiring only to support is_bl.
-    //
-    // For AARCH64, skipping a leading adr is not sufficient
-    // to reduce calls to a simple bl.
     return rawNativeCall_at(pc)->destination(adj);
   }
 
@@ -98,21 +78,16 @@ void Relocation::pd_set_call_destination(address x) {
   address pc = addr();
   NativeInstruction* ni = nativeInstruction_at(pc);
 
-#if (!defined(AARCH64))
-  if (NOT_AARCH64(ni->is_add_lr()) AARCH64_ONLY(ni->is_adr_aligned_lr())) {
-    // On arm32, skip the optional 'add LR, PC, #offset'
+  if (ni->is_add_lr()) {
+    // Skip the optional 'add LR, PC, #offset'
     // (Allowing the jump support code to handle fat_call)
     pc = ni->next_raw_instruction_address();
     ni = nativeInstruction_at(pc);
   }
-#endif
 
-  if (AARCH64_ONLY(ni->is_call()) NOT_AARCH64(ni->is_bl())) {
-    // For arm32, fat_call are handled by is_jump for the new 'ni',
+  if (ni->is_bl()) {
+    // Fat_call are handled by is_jump for the new 'ni',
     // requiring only to support is_bl.
-    //
-    // For AARCH64, skipping a leading adr is not sufficient
-    // to reduce calls to a simple bl.
     rawNativeCall_at(pc)->set_destination(x);
     return;
   }
@@ -138,15 +113,6 @@ void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffe
 
 void metadata_Relocation::pd_fix_value(address x) {
   assert(! addr_in_const(), "Do not use");
-#ifdef AARCH64
-#ifdef COMPILER2
-  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
-  if (ni->is_mov_slow()) {
-    return;
-  }
-#endif
-  set_value(x);
-#else
   if (!VM_Version::supports_movw()) {
     set_value(x);
 #ifdef ASSERT
@@ -165,5 +131,4 @@ void metadata_Relocation::pd_fix_value(address x) {
     // assert(ni->data() == (int)x, "metadata relocation mismatch");
 #endif
   }
-#endif // !AARCH64
 }
diff --git a/src/hotspot/cpu/arm/runtime_arm.cpp b/src/hotspot/cpu/arm/runtime_arm.cpp
index 7918011929d..9aa8c11407d 100644
--- a/src/hotspot/cpu/arm/runtime_arm.cpp
+++ b/src/hotspot/cpu/arm/runtime_arm.cpp
@@ -126,15 +126,8 @@ void OptoRuntime::generate_exception_blob() {
 
   // Restore SP from its saved reg (FP) if the exception PC is a MethodHandle call site.
   __ ldr(Rtemp, Address(Rthread, JavaThread::is_method_handle_return_offset()));
-#ifdef AARCH64
-  Label skip;
-  __ cbz(Rtemp, skip);
-  __ mov(SP, Rmh_SP_save);
-  __ bind(skip);
-#else
   __ cmp(Rtemp, 0);
   __ mov(SP, Rmh_SP_save, ne);
-#endif
 
   // R0 contains handler address
   // Since this may be the deopt blob we must set R5 to look like we returned
diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
index 25366a8af54..e175c0cf2b2 100644
--- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
+++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
@@ -62,46 +62,6 @@ class RegisterSaver {
   // branch to the runtime. The slot at R14/R30_offset is for the value of LR
   // in case it's live in the method we are coming from.
 
-#ifdef AARCH64
-
-  //
-  // On AArch64 registers save area has the following layout:
-  //
-  // |---------------------|
-  // | return address (LR) |
-  // | FP                  |
-  // |---------------------|
-  // | V31                 |
-  // | ...                 |
-  // | V0                  |
-  // |---------------------|
-  // | padding             |
-  // | R30 (LR live value) |
-  // |---------------------|
-  // | R27                 |
-  // | ...                 |
-  // | R0                  |
-  // |---------------------| <-- SP
-  //
-
-  enum RegisterLayout {
-    number_of_saved_gprs = 28,
-    number_of_saved_fprs = FloatRegisterImpl::number_of_registers,
-    words_per_fpr = ConcreteRegisterImpl::words_per_fpr,
-
-    R0_offset  = 0,
-    R30_offset = R0_offset + number_of_saved_gprs,
-    D0_offset  = R30_offset + 2,
-    FP_offset  = D0_offset + number_of_saved_fprs * words_per_fpr,
-    LR_offset  = FP_offset + 1,
-
-    reg_save_size = LR_offset + 1,
-  };
-
-  static const int Rmethod_offset;
-  static const int Rtemp_offset;
-
-#else
 
   enum RegisterLayout {
     fpu_save_size = FloatRegisterImpl::number_of_registers,
@@ -139,7 +99,6 @@ class RegisterSaver {
   // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
 
-#endif // AARCH64
 
   //  When LR may be live in the nmethod from which we are comming
   //  then lr_saved is true, the return address is saved before the
@@ -154,10 +113,6 @@ class RegisterSaver {
 };
 
 
-#ifdef AARCH64
-const int RegisterSaver::Rmethod_offset = RegisterSaver::R0_offset + Rmethod->encoding();
-const int RegisterSaver::Rtemp_offset   = RegisterSaver::R0_offset + Rtemp->encoding();
-#endif // AARCH64
 
 
 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
@@ -168,47 +123,6 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
   OopMapSet *oop_maps = new OopMapSet();
   OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
 
-#ifdef AARCH64
-  assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned");
-
-  if (lr_saved) {
-    // LR was stashed here, so that jump could use it as a scratch reg
-    __ ldr(LR, Address(SP, 0));
-    // There are two words on the stack top:
-    //  [SP + 0]: placeholder for FP
-    //  [SP + wordSize]: saved return address
-    __ str(FP, Address(SP, 0));
-  } else {
-    __ raw_push(FP, LR);
-  }
-
-  __ sub(SP, SP, (reg_save_size - 2) * wordSize);
-
-  for (int i = 0; i < number_of_saved_gprs; i += 2) {
-    int offset = R0_offset + i;
-    __ stp(as_Register(i), as_Register(i+1), Address(SP, offset * wordSize));
-    map->set_callee_saved(VMRegImpl::stack2reg((offset + 0) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg());
-    map->set_callee_saved(VMRegImpl::stack2reg((offset + 1) * VMRegImpl::slots_per_word), as_Register(i+1)->as_VMReg());
-  }
-
-  __ str(R30, Address(SP, R30_offset * wordSize));
-  map->set_callee_saved(VMRegImpl::stack2reg(R30_offset * VMRegImpl::slots_per_word), R30->as_VMReg());
-
-  for (int i = 0; i < number_of_saved_fprs; i += 2) {
-    int offset1 = D0_offset + i * words_per_fpr;
-    int offset2 = offset1 + words_per_fpr;
-    Address base(SP, offset1 * wordSize);
-    if (words_per_fpr == 2) {
-      // pair of "wide" quad vector registers
-      __ stp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
-    } else {
-      // pair of double vector registers
-      __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
-    }
-    map->set_callee_saved(VMRegImpl::stack2reg(offset1 * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg());
-    map->set_callee_saved(VMRegImpl::stack2reg(offset2 * VMRegImpl::slots_per_word), as_FloatRegister(i+1)->as_VMReg());
-  }
-#else
   if (lr_saved) {
     __ push(RegisterSet(FP));
   } else {
@@ -252,38 +166,11 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
       map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
     }
   }
-#endif // AARCH64
 
   return map;
 }
 
 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
-#ifdef AARCH64
-  for (int i = 0; i < number_of_saved_gprs; i += 2) {
-    __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
-  }
-
-  __ ldr(R30, Address(SP, R30_offset * wordSize));
-
-  for (int i = 0; i < number_of_saved_fprs; i += 2) {
-    Address base(SP, (D0_offset + i * words_per_fpr) * wordSize);
-    if (words_per_fpr == 2) {
-      // pair of "wide" quad vector registers
-      __ ldp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
-    } else {
-      // pair of double vector registers
-      __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
-    }
-  }
-
-  __ add(SP, SP, (reg_save_size - 2) * wordSize);
-
-  if (restore_lr) {
-    __ raw_pop(FP, LR);
-  } else {
-    __ ldr(FP, Address(SP, 0));
-  }
-#else
   if (HaveVFP) {
     __ fpop(FloatRegisterSet(D0, 16));
     if (VM_Version::has_vfp3_32()) {
@@ -303,58 +190,8 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr
   } else {
     __ pop(RegisterSet(FP));
   }
-#endif // AARCH64
-}
-
-#ifdef AARCH64
-
-static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
-  if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
-    __ str_d(D0, Address(SP, -2*wordSize, pre_indexed));
-  } else {
-    __ raw_push(R0, ZR);
-  }
-}
-
-static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
-  if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
-    __ ldr_d(D0, Address(SP, 2*wordSize, post_indexed));
-  } else {
-    __ raw_pop(R0, ZR);
-  }
-}
-
-static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
-  __ raw_push(R0, R1);
-  __ raw_push(R2, R3);
-  __ raw_push(R4, R5);
-  __ raw_push(R6, R7);
-
-  assert(FPR_PARAMS == 8, "adjust this code");
-  assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
-
-  if (fp_regs_in_arguments > 6) __ stp_d(V6, V7, Address(SP, -2 * wordSize, pre_indexed));
-  if (fp_regs_in_arguments > 4) __ stp_d(V4, V5, Address(SP, -2 * wordSize, pre_indexed));
-  if (fp_regs_in_arguments > 2) __ stp_d(V2, V3, Address(SP, -2 * wordSize, pre_indexed));
-  if (fp_regs_in_arguments > 0) __ stp_d(V0, V1, Address(SP, -2 * wordSize, pre_indexed));
-}
-
-static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
-  assert(FPR_PARAMS == 8, "adjust this code");
-  assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
-
-  if (fp_regs_in_arguments > 0) __ ldp_d(V0, V1, Address(SP, 2 * wordSize, post_indexed));
-  if (fp_regs_in_arguments > 2) __ ldp_d(V2, V3, Address(SP, 2 * wordSize, post_indexed));
-  if (fp_regs_in_arguments > 4) __ ldp_d(V4, V5, Address(SP, 2 * wordSize, post_indexed));
-  if (fp_regs_in_arguments > 6) __ ldp_d(V6, V7, Address(SP, 2 * wordSize, post_indexed));
-
-  __ raw_pop(R6, R7);
-  __ raw_pop(R4, R5);
-  __ raw_pop(R2, R3);
-  __ raw_pop(R0, R1);
 }
 
-#else // AARCH64
 
 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 #ifdef __ABI_HARD__
@@ -400,7 +237,6 @@ static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments)
   __ pop(RegisterSet(R0, R3));
 }
 
-#endif // AARCH64
 
 
 // Is vector's size (in bytes) bigger than a size saved by default?
@@ -424,73 +260,6 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
                                         VMRegPair *regs2,
                                         int total_args_passed) {
   assert(regs2 == NULL, "not needed on arm");
-#ifdef AARCH64
-  int slot = 0; // counted in 32-bit VMReg slots
-  int reg = 0;
-  int fp_reg = 0;
-  for (int i = 0; i < total_args_passed; i++) {
-    switch (sig_bt[i]) {
-    case T_SHORT:
-    case T_CHAR:
-    case T_BYTE:
-    case T_BOOLEAN:
-    case T_INT:
-      if (reg < GPR_PARAMS) {
-        Register r = as_Register(reg);
-        regs[i].set1(r->as_VMReg());
-        reg++;
-      } else {
-        regs[i].set1(VMRegImpl::stack2reg(slot));
-        slot+=2;
-      }
-      break;
-    case T_LONG:
-      assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
-      // fall through
-    case T_ARRAY:
-    case T_OBJECT:
-    case T_ADDRESS:
-      if (reg < GPR_PARAMS) {
-        Register r = as_Register(reg);
-        regs[i].set2(r->as_VMReg());
-        reg++;
-      } else {
-        regs[i].set2(VMRegImpl::stack2reg(slot));
-        slot+=2;
-      }
-      break;
-    case T_FLOAT:
-      if (fp_reg < FPR_PARAMS) {
-        FloatRegister r = as_FloatRegister(fp_reg);
-        regs[i].set1(r->as_VMReg());
-        fp_reg++;
-      } else {
-        regs[i].set1(VMRegImpl::stack2reg(slot));
-        slot+=2;
-      }
-      break;
-    case T_DOUBLE:
-      assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
-      if (fp_reg < FPR_PARAMS) {
-        FloatRegister r = as_FloatRegister(fp_reg);
-        regs[i].set2(r->as_VMReg());
-        fp_reg++;
-      } else {
-        regs[i].set2(VMRegImpl::stack2reg(slot));
-        slot+=2;
-      }
-      break;
-    case T_VOID:
-      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
-      regs[i].set_bad();
-      break;
-    default:
-      ShouldNotReachHere();
-    }
-  }
-  return slot;
-
-#else // AARCH64
 
   int slot = 0;
   int ireg = 0;
@@ -587,17 +356,12 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
     }
   }
   return slot;
-#endif // AARCH64
 }
 
 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
                                            VMRegPair *regs,
                                            int total_args_passed,
                                            int is_outgoing) {
-#ifdef AARCH64
-  // C calling convention on AArch64 is good enough.
-  return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
-#else
 #ifdef __SOFTFP__
   // soft float is the same as the C calling convention.
   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
@@ -680,7 +444,6 @@ int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 
   if (slot & 1) slot++;
   return slot;
-#endif // AARCH64
 }
 
 static void patch_callers_callsite(MacroAssembler *masm) {
@@ -689,27 +452,17 @@ static void patch_callers_callsite(MacroAssembler *masm) {
   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
   __ cbz(Rtemp, skip);
 
-#ifdef AARCH64
-  push_param_registers(masm, FPR_PARAMS);
-  __ raw_push(LR, ZR);
-#else
   // Pushing an even number of registers for stack alignment.
   // Selecting R9, which had to be saved anyway for some platforms.
   __ push(RegisterSet(R0, R3) | R9 | LR);
   __ fpush_hardfp(FloatRegisterSet(D0, 8));
-#endif // AARCH64
 
   __ mov(R0, Rmethod);
   __ mov(R1, LR);
   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 
-#ifdef AARCH64
-  __ raw_pop(LR, ZR);
-  pop_param_registers(masm, FPR_PARAMS);
-#else
   __ fpop_hardfp(FloatRegisterSet(D0, 8));
   __ pop(RegisterSet(R0, R3) | R9 | LR);
-#endif // AARCH64
 
   __ bind(skip);
 }
@@ -736,57 +489,6 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
   Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
   __ str(Rmethod, callee_target_addr);
 
-#ifdef AARCH64
-
-  assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rmethod);
-  assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rparams);
-
-  if (comp_args_on_stack) {
-    __ sub_slow(SP, SP, align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes));
-  }
-
-  for (int i = 0; i < total_args_passed; i++) {
-    if (sig_bt[i] == T_VOID) {
-      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
-      continue;
-    }
-    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
-
-    int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
-    Address source_addr(Rparams, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
-
-    VMReg r = regs[i].first();
-    bool full_word = regs[i].second()->is_valid();
-
-    if (r->is_stack()) {
-      if (full_word) {
-        __ ldr(tmp, source_addr);
-        __ str(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
-      } else {
-        __ ldr_w(tmp, source_addr);
-        __ str_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
-      }
-    } else if (r->is_Register()) {
-      if (full_word) {
-        __ ldr(r->as_Register(), source_addr);
-      } else {
-        __ ldr_w(r->as_Register(), source_addr);
-      }
-    } else if (r->is_FloatRegister()) {
-      if (sig_bt[i] == T_DOUBLE) {
-        __ ldr_d(r->as_FloatRegister(), source_addr);
-      } else {
-        __ ldr_s(r->as_FloatRegister(), source_addr);
-      }
-    } else {
-      assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
-    }
-  }
-
-  __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset()));
-  __ br(tmp);
-
-#else
 
   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
 
@@ -848,7 +550,6 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
   __ ldr(Rmethod, callee_target_addr);
   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
 
-#endif // AARCH64
 }
 
 static void gen_c2i_adapter(MacroAssembler *masm,
@@ -863,56 +564,6 @@ static void gen_c2i_adapter(MacroAssembler *masm,
 
   __ mov(Rsender_sp, SP); // not yet saved
 
-#ifdef AARCH64
-
-  int extraspace = align_up(total_args_passed * Interpreter::stackElementSize, StackAlignmentInBytes);
-  if (extraspace) {
-    __ sub(SP, SP, extraspace);
-  }
-
-  for (int i = 0; i < total_args_passed; i++) {
-    if (sig_bt[i] == T_VOID) {
-      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
-      continue;
-    }
-
-    int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
-    Address dest_addr(SP, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
-
-    VMReg r = regs[i].first();
-    bool full_word = regs[i].second()->is_valid();
-
-    if (r->is_stack()) {
-      if (full_word) {
-        __ ldr(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
-        __ str(tmp, dest_addr);
-      } else {
-        __ ldr_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
-        __ str_w(tmp, dest_addr);
-      }
-    } else if (r->is_Register()) {
-      if (full_word) {
-        __ str(r->as_Register(), dest_addr);
-      } else {
-        __ str_w(r->as_Register(), dest_addr);
-      }
-    } else if (r->is_FloatRegister()) {
-      if (sig_bt[i] == T_DOUBLE) {
-        __ str_d(r->as_FloatRegister(), dest_addr);
-      } else {
-        __ str_s(r->as_FloatRegister(), dest_addr);
-      }
-    } else {
-      assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
-    }
-  }
-
-  __ mov(Rparams, SP);
-
-  __ ldr(tmp, Address(Rmethod, Method::interpreter_entry_offset()));
-  __ br(tmp);
-
-#else
 
   int extraspace = total_args_passed * Interpreter::stackElementSize;
   if (extraspace) {
@@ -962,7 +613,6 @@ static void gen_c2i_adapter(MacroAssembler *masm,
 
   __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
 
-#endif // AARCH64
 }
 
 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
@@ -978,26 +628,17 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
   Label skip_fixup;
   const Register receiver       = R0;
   const Register holder_klass   = Rtemp; // XXX should be OK for C2 but not 100% sure
-  const Register receiver_klass = AARCH64_ONLY(R8) NOT_AARCH64(R4);
+  const Register receiver_klass = R4;
 
   __ load_klass(receiver_klass, receiver);
   __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
   __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
   __ cmp(receiver_klass, holder_klass);
 
-#ifdef AARCH64
-  Label ic_miss;
-  __ b(ic_miss, ne);
-  __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
-  __ cbz(Rtemp, skip_fixup);
-  __ bind(ic_miss);
-  __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
-#else
   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
   __ cmp(Rtemp, 0, eq);
   __ b(skip_fixup, eq);
   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
-#endif // AARCH64
 
   address c2i_entry = __ pc();
   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
@@ -1199,10 +840,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   __ bind(verified);
   int vep_offset = __ pc() - start;
 
-#ifdef AARCH64
-  // Extra nop for MT-safe patching in NativeJump::patch_verified_entry
-  __ nop();
-#endif // AARCH64
 
   if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
     // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
@@ -1215,15 +852,8 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
       assert(method->is_static(), "method should be static");
       // return 0 for null reference input, return val = R0 = obj_reg = 0
-#ifdef AARCH64
-      Label Continue;
-      __ cbnz(obj_reg, Continue);
-      __ ret();
-      __ bind(Continue);
-#else
       __ cmp(obj_reg, 0);
       __ bx(LR, eq);
-#endif
     }
 
     __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
@@ -1236,16 +866,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
       __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case);
     }
 
-#ifdef AARCH64
-    __ ands(Rtemp, Rtemp, (uintx)markOopDesc::hash_mask_in_place);
-    __ b(slow_case, eq);
-    __ logical_shift_right(R0, Rtemp, markOopDesc::hash_shift);
-    __ ret();
-#else
     __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place);
     __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne);
     __ bx(LR, ne);
-#endif // AARCH64
 
     __ bind(slow_case);
   }
@@ -1277,12 +900,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
         assert(i != 0, "Incoming receiver is always in a register");
         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
         __ cmp(Rtemp, 0);
-#ifdef AARCH64
-        __ add(Rtemp, FP, reg2offset_in(src));
-        __ csel(Rtemp, ZR, Rtemp, eq);
-#else
         __ add(Rtemp, FP, reg2offset_in(src), ne);
-#endif // AARCH64
         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
         int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
@@ -1295,14 +913,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
         }
         oop_handle_offset += VMRegImpl::slots_per_word;
 
-#ifdef AARCH64
-        __ cmp(src->as_Register(), 0);
-        __ add(Rtemp, SP, offset);
-        __ csel(dst->is_stack() ? Rtemp : dst->as_Register(), ZR, Rtemp, eq);
-        if (dst->is_stack()) {
-          __ str(Rtemp, Address(SP, reg2offset_out(dst)));
-        }
-#else
         if (dst->is_stack()) {
           __ movs(Rtemp, src->as_Register());
           __ add(Rtemp, SP, offset, ne);
@@ -1311,30 +921,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
           __ movs(dst->as_Register(), src->as_Register());
           __ add(dst->as_Register(), SP, offset, ne);
         }
-#endif // AARCH64
       }
     }
 
     case T_VOID:
       break;
 
-#ifdef AARCH64
-    case T_FLOAT:
-    case T_DOUBLE: {
-      VMReg src = in_regs[i].first();
-      VMReg dst = out_regs[i + extra_args].first();
-      if (src->is_stack()) {
-        assert(dst->is_stack(), "must be");
-        __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
-        __ str(Rtemp, Address(SP, reg2offset_out(dst)));
-      } else {
-        assert(src->is_FloatRegister() && dst->is_FloatRegister(), "must be");
-        assert(src->as_FloatRegister() == dst->as_FloatRegister(), "must be");
-        fp_regs_in_arguments++;
-      }
-      break;
-    }
-#else // AARCH64
 
 #ifdef __SOFTFP__
     case T_DOUBLE:
@@ -1507,7 +1099,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
       break;
     }
 #endif // __ABI_HARD__
-#endif // AARCH64
 
     default: {
       assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
@@ -1542,16 +1133,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
   oop_maps->add_gc_map(pc_offset, map);
 
-#ifndef AARCH64
   // Order last_Java_pc store with the thread state transition (to _thread_in_native)
   __ membar(MacroAssembler::StoreStore, Rtemp);
-#endif // !AARCH64
 
   // RedefineClasses() tracing support for obsolete method entry
   if (log_is_enabled(Trace, redefine, class, obsolete)) {
-#ifdef AARCH64
-    __ NOT_TESTED();
-#endif
     __ save_caller_save_registers();
     __ mov(R0, Rthread);
     __ mov_metadata(R1, method());
@@ -1559,10 +1145,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     __ restore_caller_save_registers();
   }
 
-  const Register sync_handle = AARCH64_ONLY(R20) NOT_AARCH64(R5);
-  const Register sync_obj    = AARCH64_ONLY(R21) NOT_AARCH64(R6);
-  const Register disp_hdr    = AARCH64_ONLY(R22) NOT_AARCH64(altFP_7_11);
-  const Register tmp         = AARCH64_ONLY(R23) NOT_AARCH64(R8);
+  const Register sync_handle = R5;
+  const Register sync_obj    = R6;
+  const Register disp_hdr    = altFP_7_11;
+  const Register tmp         = R8;
 
   Label slow_lock, slow_lock_biased, lock_done, fast_lock, leave;
   if (method->is_synchronized()) {
@@ -1576,35 +1162,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     }
 
     const Register mark = tmp;
-#ifdef AARCH64
-    __ sub(disp_hdr, FP, lock_slot_fp_offset);
-    assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions");
-
-    __ ldr(mark, sync_obj);
-
-    // Test if object is already locked
-    assert(markOopDesc::unlocked_value == 1, "adjust this code");
-    __ tbnz(mark, exact_log2(markOopDesc::unlocked_value), fast_lock);
-
-    // Check for recursive lock
-    // See comments in InterpreterMacroAssembler::lock_object for
-    // explanations on the fast recursive locking check.
-    __ mov(Rtemp, SP);
-    __ sub(Rtemp, mark, Rtemp);
-    intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
-    Assembler::LogicalImmediate imm(mask, false);
-    __ ands(Rtemp, Rtemp, imm);
-    __ b(slow_lock, ne);
-
-    // Recursive locking: store 0 into a lock record
-    __ str(ZR, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
-    __ b(lock_done);
-
-    __ bind(fast_lock);
-    __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
-
-    __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
-#else
     // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
     // That would be acceptable as either CAS or slow case path is taken in that case
 
@@ -1632,7 +1189,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
 
     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
-#endif // AARCH64
 
     __ bind(lock_done);
   }
@@ -1642,13 +1198,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 
   // Perform thread state transition
   __ mov(Rtemp, _thread_in_native);
-#ifdef AARCH64
-  // stlr instruction is used to force all preceding writes to be observed prior to thread state change
-  __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset()));
-  __ stlr_w(Rtemp, Rtemp2);
-#else
   __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
-#endif // AARCH64
 
   // Finally, call the native method
   __ call(method->native_function());
@@ -1713,37 +1263,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
   __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
 
-#ifdef AARCH64
-  __ str_32(ZR, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
-  if (CheckJNICalls) {
-    __ str(ZR, Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
-  }
-
-
-  switch (ret_type) {
-  case T_BOOLEAN:
-    __ tst(R0, 0xff);
-    __ cset(R0, ne);
-    break;
-  case T_CHAR   : __ zero_extend(R0, R0, 16);  break;
-  case T_BYTE   : __ sign_extend(R0, R0,  8);  break;
-  case T_SHORT  : __ sign_extend(R0, R0, 16);  break;
-  case T_INT    : // fall through
-  case T_LONG   : // fall through
-  case T_VOID   : // fall through
-  case T_FLOAT  : // fall through
-  case T_DOUBLE : /* nothing to do */          break;
-  case T_OBJECT : // fall through
-  case T_ARRAY  : break; // See JNIHandles::resolve below
-  default:
-    ShouldNotReachHere();
-  }
-#else
   __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
   if (CheckJNICalls) {
     __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
   }
-#endif // AARCH64
 
   // Unbox oop result, e.g. JNIHandles::resolve value in R0.
   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
@@ -1756,23 +1279,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
   __ mov(SP, FP);
 
-#ifdef AARCH64
-  Label except;
-  __ cbnz(Rtemp, except);
-  __ raw_pop(FP, LR);
-  __ ret();
-
-  __ bind(except);
-  // Pop the frame and forward the exception. Rexception_pc contains return address.
-  __ raw_pop(FP, Rexception_pc);
-#else
   __ cmp(Rtemp, 0);
   // Pop the frame and return if no exception pending
   __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
   // Pop the frame and forward the exception. Rexception_pc contains return address.
   __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
   __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
-#endif // AARCH64
   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
 
   // Safepoint operation and/or pending suspend request is in progress.
@@ -1852,9 +1364,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 // activation for use during deoptimization
 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
   int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
-#ifdef AARCH64
-  extra_locals_size = align_up(extra_locals_size, StackAlignmentInBytes/BytesPerWord);
-#endif // AARCH64
   return extra_locals_size;
 }
 
@@ -1867,11 +1376,7 @@ uint SharedRuntime::out_preserve_stack_slots() {
 //------------------------------generate_deopt_blob----------------------------
 void SharedRuntime::generate_deopt_blob() {
   ResourceMark rm;
-#ifdef AARCH64
-  CodeBuffer buffer("deopt_blob", 1024+256, 1);
-#else
   CodeBuffer buffer("deopt_blob", 1024, 1024);
-#endif
   int frame_size_in_words;
   OopMapSet* oop_maps;
   int reexecute_offset;
@@ -1880,9 +1385,9 @@ void SharedRuntime::generate_deopt_blob() {
 
   MacroAssembler* masm = new MacroAssembler(&buffer);
   Label cont;
-  const Register Rkind   = AARCH64_ONLY(R21) NOT_AARCH64(R9); // caller-saved on 32bit
-  const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
-  const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
+  const Register Rkind   = R9; // caller-saved
+  const Register Rublock = R6;
+  const Register Rsender = altFP_7_11;
   assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
 
   address start = __ pc();
@@ -1972,9 +1477,7 @@ void SharedRuntime::generate_deopt_blob() {
   // This frame is going away.  Fetch return value, so we can move it to
   // a new frame.
   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
-#ifndef AARCH64
   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
-#endif // !AARCH64
 #ifndef __SOFTFP__
   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
 #endif
@@ -1986,34 +1489,7 @@ void SharedRuntime::generate_deopt_blob() {
   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
 
-#ifdef AARCH64
-  // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
-  // They are needed for correct stack walking during stack overflow handling.
-  // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
-  __ sub(Rtemp, Rtemp, 2*wordSize);
-  __ add(SP, SP, Rtemp, ex_uxtx);
-  __ raw_pop(FP, LR);
-
-#ifdef ASSERT
-  { Label L;
-    __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
-    __ cmp(FP, Rtemp);
-    __ b(L, eq);
-    __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
-    __ bind(L);
-  }
-  { Label L;
-    __ ldr(Rtemp, Address(R2));
-    __ cmp(LR, Rtemp);
-    __ b(L, eq);
-    __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
-    __ bind(L);
-  }
-#endif // ASSERT
-
-#else
   __ add(SP, SP, Rtemp);
-#endif // AARCH64
 
 #ifdef ASSERT
   // Compilers generate code that bang the stack by as much as the
@@ -2021,7 +1497,6 @@ void SharedRuntime::generate_deopt_blob() {
   // trigger a fault. Verify that it does not on non product builds.
   // See if it is enough stack to push deoptimized frames
   if (UseStackBanging) {
-#ifndef AARCH64
     // The compiled method that we are deoptimizing was popped from the stack.
     // If the stack bang results in a stack overflow, we don't return to the
     // method that is being deoptimized. The stack overflow exception is
@@ -2029,14 +1504,12 @@ void SharedRuntime::generate_deopt_blob() {
     // from the caller in LR and restore FP.
     __ ldr(LR, Address(R2, 0));
     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
-#endif // !AARCH64
     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
     __ arm_stack_overflow_check(R8, Rtemp);
   }
 #endif
   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
 
-#ifndef AARCH64
   // Pick up the initial fp we should save
   // XXX Note: was ldr(FP, Address(FP));
 
@@ -2048,15 +1521,10 @@ void SharedRuntime::generate_deopt_blob() {
   // Deoptimization::fetch_unroll_info computes the right FP value and
   // stores it in Rublock.initial_info. This has been activated for ARM.
   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
-#endif // !AARCH64
 
   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
   __ mov(Rsender, SP);
-#ifdef AARCH64
-  __ sub(SP, SP, Rtemp, ex_uxtx);
-#else
   __ sub(SP, SP, Rtemp);
-#endif // AARCH64
 
   // Push interpreter frames in a loop
   Label loop;
@@ -2068,19 +1536,11 @@ void SharedRuntime::generate_deopt_blob() {
   __ mov(FP, SP);
   __ sub(Rtemp, Rtemp, 2*wordSize);
 
-#ifdef AARCH64
-  __ sub(SP, SP, Rtemp, ex_uxtx);
-#else
   __ sub(SP, SP, Rtemp);
-#endif // AARCH64
 
   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
-#ifdef AARCH64
-  __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
-#else
   __ mov(LR, 0);
   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
-#endif // AARCH64
 
   __ subs(R8, R8, 1);                               // decrement counter
   __ mov(Rsender, SP);
@@ -2094,15 +1554,12 @@ void SharedRuntime::generate_deopt_blob() {
 
   // Restore frame locals after moving the frame
   __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
-#ifndef AARCH64
   __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
-#endif // !AARCH64
 
 #ifndef __SOFTFP__
   __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
 #endif // !__SOFTFP__
 
-#ifndef AARCH64
 #ifdef ASSERT
   // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
   { Label L;
@@ -2112,7 +1569,6 @@ void SharedRuntime::generate_deopt_blob() {
     __ stop("Rkind was overwritten");
     __ bind(L);
   }
-#endif
 #endif
 
   // Call unpack_frames with proper arguments
@@ -2130,9 +1586,7 @@ void SharedRuntime::generate_deopt_blob() {
 
   // Collect return values, pop self-frame and jump to interpreter
   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
-#ifndef AARCH64
   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
-#endif // !AARCH64
   // Interpreter floats controlled by __SOFTFP__, but compiler
   // float return value registers controlled by __ABI_HARD__
   // This matters for vfp-sflt builds.
@@ -2149,12 +1603,7 @@ void SharedRuntime::generate_deopt_blob() {
 #endif // !__SOFTFP__
   __ mov(SP, FP);
 
-#ifdef AARCH64
-  __ raw_pop(FP, LR);
-  __ ret();
-#else
   __ pop(RegisterSet(FP) | RegisterSet(PC));
-#endif // AARCH64
 
   __ flush();
 
@@ -2183,8 +1632,8 @@ void SharedRuntime::generate_uncommon_trap_blob() {
 #endif
   // bypassed when code generation useless
   MacroAssembler* masm               = new MacroAssembler(&buffer);
-  const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
-  const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
+  const Register Rublock = R6;
+  const Register Rsender = altFP_7_11;
   assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
 
   //
@@ -2240,34 +1689,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
 
-#ifdef AARCH64
-  // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
-  // They are needed for correct stack walking during stack overflow handling.
-  // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
-  __ sub(Rtemp, Rtemp, 2*wordSize);
-  __ add(SP, SP, Rtemp, ex_uxtx);
-  __ raw_pop(FP, LR);
-
-#ifdef ASSERT
-  { Label L;
-    __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
-    __ cmp(FP, Rtemp);
-    __ b(L, eq);
-    __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
-    __ bind(L);
-  }
-  { Label L;
-    __ ldr(Rtemp, Address(R2));
-    __ cmp(LR, Rtemp);
-    __ b(L, eq);
-    __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
-    __ bind(L);
-  }
-#endif // ASSERT
-
-#else
   __ add(SP, SP, Rtemp);
-#endif //AARCH64
 
   // See if it is enough stack to push deoptimized frames
 #ifdef ASSERT
@@ -2275,7 +1697,6 @@ void SharedRuntime::generate_uncommon_trap_blob() {
   // interpreter would need. So this stack banging should never
   // trigger a fault. Verify that it does not on non product builds.
   if (UseStackBanging) {
-#ifndef AARCH64
     // The compiled method that we are deoptimizing was popped from the stack.
     // If the stack bang results in a stack overflow, we don't return to the
     // method that is being deoptimized. The stack overflow exception is
@@ -2283,7 +1704,6 @@ void SharedRuntime::generate_uncommon_trap_blob() {
     // from the caller in LR and restore FP.
     __ ldr(LR, Address(R2, 0));
     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
-#endif // !AARCH64
     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
     __ arm_stack_overflow_check(R8, Rtemp);
   }
@@ -2291,15 +1711,9 @@ void SharedRuntime::generate_uncommon_trap_blob() {
   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
   __ mov(Rsender, SP);
-#ifdef AARCH64
-  __ sub(SP, SP, Rtemp, ex_uxtx);
-#else
   __ sub(SP, SP, Rtemp);
-#endif
-#ifndef AARCH64
   //  __ ldr(FP, Address(FP));
   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
-#endif // AARCH64
 
   // Push interpreter frames in a loop
   Label loop;
@@ -2311,19 +1725,11 @@ void SharedRuntime::generate_uncommon_trap_blob() {
   __ mov(FP, SP);
   __ sub(Rtemp, Rtemp, 2*wordSize);
 
-#ifdef AARCH64
-  __ sub(SP, SP, Rtemp, ex_uxtx);
-#else
   __ sub(SP, SP, Rtemp);
-#endif // AARCH64
 
   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
-#ifdef AARCH64
-  __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
-#else
   __ mov(LR, 0);
   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
-#endif // AARCH64
   __ subs(R8, R8, 1);                               // decrement counter
   __ mov(Rsender, SP);
   __ b(loop, ne);
@@ -2342,12 +1748,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
   __ reset_last_Java_frame(Rtemp);
 
   __ mov(SP, FP);
-#ifdef AARCH64
-  __ raw_pop(FP, LR);
-  __ ret();
-#else
   __ pop(RegisterSet(FP) | RegisterSet(PC));
-#endif
 
   masm->flush();
   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
@@ -2376,12 +1777,8 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
   oop_maps = new OopMapSet();
 
   if (!cause_return) {
-#ifdef AARCH64
-    __ raw_push(LR, LR);
-#else
     __ sub(SP, SP, 4); // make room for LR which may still be live
                        // here if we are coming from a c2 method
-#endif // AARCH64
   }
 
   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
@@ -2406,20 +1803,6 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
   __ cmp(Rtemp, 0);
 
-#ifdef AARCH64
-  RegisterSaver::restore_live_registers(masm, cause_return);
-  Register ret_addr = cause_return ? LR : Rtemp;
-  if (!cause_return) {
-    __ raw_pop(FP, ret_addr);
-  }
-
-  Label throw_exception;
-  __ b(throw_exception, ne);
-  __ br(ret_addr);
-
-  __ bind(throw_exception);
-  __ mov(Rexception_pc, ret_addr);
-#else // AARCH64
   if (!cause_return) {
     RegisterSaver::restore_live_registers(masm, false);
     __ pop(PC, eq);
@@ -2429,7 +1812,6 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
     __ bx(LR, eq);
     __ mov(Rexception_pc, LR);
   }
-#endif // AARCH64
 
   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
 
diff --git a/src/hotspot/cpu/arm/stubGenerator_arm.cpp b/src/hotspot/cpu/arm/stubGenerator_arm.cpp
index 67d9fbc977b..380b015c7b2 100644
--- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp
+++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp
@@ -85,21 +85,13 @@
 // Hard coded choices (XXX: could be changed to a command line option)
 #define ArmCopyPlatform DEFAULT_ARRAYCOPY_CONFIG
 
-#ifdef AARCH64
-#define ArmCopyCacheLineSize 64
-#else
 #define ArmCopyCacheLineSize 32 // not worth optimizing to 64 according to measured gains
-#endif // AARCH64
-
-// TODO-AARCH64: tune and revise AArch64 arraycopy optimizations
 
 // configuration for each kind of loop
 typedef struct {
   int pld_distance;       // prefetch distance (0 => no prefetch, <0: prefetch_before);
-#ifndef AARCH64
   bool split_ldm;         // if true, split each STM in STMs with fewer registers
   bool split_stm;         // if true, split each LTM in LTMs with fewer registers
-#endif // !AARCH64
 } arraycopy_loop_config;
 
 // configuration for all loops
@@ -114,14 +106,6 @@ typedef struct {
 // configured platforms
 static arraycopy_platform_config arraycopy_configurations[] = {
   // configuration parameters for arraycopy loops
-#ifdef AARCH64
-  {
-    {-256 }, // forward aligned
-    {-128 }, // backward aligned
-    {-256 }, // forward shifted
-    {-128 }  // backward shifted
-  }
-#else
 
   // Configurations were chosen based on manual analysis of benchmark
   // results, minimizing overhead with respect to best results on the
@@ -171,7 +155,6 @@ static arraycopy_platform_config arraycopy_configurations[] = {
     {-160, false, false }, // forward shifted
     {-160, true,  true  } // backward shifted
   }
-#endif // AARCH64
 };
 
 class StubGenerator: public StubCodeGenerator {
@@ -190,100 +173,6 @@ class StubGenerator: public StubCodeGenerator {
     StubCodeMark mark(this, "StubRoutines", "call_stub");
     address start = __ pc();
 
-#ifdef AARCH64
-    const int saved_regs_size = 192;
-
-    __ stp(FP, LR, Address(SP, -saved_regs_size, pre_indexed));
-    __ mov(FP, SP);
-
-    int sp_offset = 16;
-    assert(frame::entry_frame_call_wrapper_offset * wordSize == sp_offset, "adjust this code");
-    __ stp(R0,  ZR,  Address(SP, sp_offset)); sp_offset += 16;
-
-    const int saved_result_and_result_type_offset = sp_offset;
-    __ stp(R1,  R2,  Address(SP, sp_offset)); sp_offset += 16;
-    __ stp(R19, R20, Address(SP, sp_offset)); sp_offset += 16;
-    __ stp(R21, R22, Address(SP, sp_offset)); sp_offset += 16;
-    __ stp(R23, R24, Address(SP, sp_offset)); sp_offset += 16;
-    __ stp(R25, R26, Address(SP, sp_offset)); sp_offset += 16;
-    __ stp(R27, R28, Address(SP, sp_offset)); sp_offset += 16;
-
-    __ stp_d(V8,  V9,  Address(SP, sp_offset)); sp_offset += 16;
-    __ stp_d(V10, V11, Address(SP, sp_offset)); sp_offset += 16;
-    __ stp_d(V12, V13, Address(SP, sp_offset)); sp_offset += 16;
-    __ stp_d(V14, V15, Address(SP, sp_offset)); sp_offset += 16;
-    assert (sp_offset == saved_regs_size, "adjust this code");
-
-    __ mov(Rmethod, R3);
-    __ mov(Rthread, R7);
-    __ reinit_heapbase();
-
-    { // Pass parameters
-      Label done_parameters, pass_parameters;
-
-      __ mov(Rparams, SP);
-      __ cbz_w(R6, done_parameters);
-
-      __ sub(Rtemp, SP, R6, ex_uxtw, LogBytesPerWord);
-      __ align_reg(SP, Rtemp, StackAlignmentInBytes);
-      __ add(Rparams, SP, R6, ex_uxtw, LogBytesPerWord);
-
-      __ bind(pass_parameters);
-      __ subs_w(R6, R6, 1);
-      __ ldr(Rtemp, Address(R5, wordSize, post_indexed));
-      __ str(Rtemp, Address(Rparams, -wordSize, pre_indexed));
-      __ b(pass_parameters, ne);
-
-      __ bind(done_parameters);
-
-#ifdef ASSERT
-      {
-        Label L;
-        __ cmp(SP, Rparams);
-        __ b(L, eq);
-        __ stop("SP does not match Rparams");
-        __ bind(L);
-      }
-#endif
-    }
-
-    __ mov(Rsender_sp, SP);
-    __ blr(R4);
-    return_address = __ pc();
-
-    __ mov(SP, FP);
-
-    __ ldp(R1, R2, Address(SP, saved_result_and_result_type_offset));
-
-    { // Handle return value
-      Label cont;
-      __ str(R0, Address(R1));
-
-      __ cmp_w(R2, T_DOUBLE);
-      __ ccmp_w(R2, T_FLOAT, Assembler::flags_for_condition(eq), ne);
-      __ b(cont, ne);
-
-      __ str_d(V0, Address(R1));
-      __ bind(cont);
-    }
-
-    sp_offset = saved_result_and_result_type_offset + 16;
-    __ ldp(R19, R20, Address(SP, sp_offset)); sp_offset += 16;
-    __ ldp(R21, R22, Address(SP, sp_offset)); sp_offset += 16;
-    __ ldp(R23, R24, Address(SP, sp_offset)); sp_offset += 16;
-    __ ldp(R25, R26, Address(SP, sp_offset)); sp_offset += 16;
-    __ ldp(R27, R28, Address(SP, sp_offset)); sp_offset += 16;
-
-    __ ldp_d(V8,  V9,  Address(SP, sp_offset)); sp_offset += 16;
-    __ ldp_d(V10, V11, Address(SP, sp_offset)); sp_offset += 16;
-    __ ldp_d(V12, V13, Address(SP, sp_offset)); sp_offset += 16;
-    __ ldp_d(V14, V15, Address(SP, sp_offset)); sp_offset += 16;
-    assert (sp_offset == saved_regs_size, "adjust this code");
-
-    __ ldp(FP, LR, Address(SP, saved_regs_size, post_indexed));
-    __ ret();
-
-#else // AARCH64
 
     assert(frame::entry_frame_call_wrapper_offset == 0, "adjust this code");
 
@@ -354,7 +243,6 @@ class StubGenerator: public StubCodeGenerator {
     __ fpop_hardfp(FloatRegisterSet(D8, 8));
     __ pop(RegisterSet(FP) | RegisterSet(PC));
 
-#endif // AARCH64
     return start;
   }
 
@@ -402,7 +290,6 @@ class StubGenerator: public StubCodeGenerator {
   }
 
 
-#ifndef AARCH64
 
   // Integer division shared routine
   //   Input:
@@ -791,7 +678,6 @@ class StubGenerator: public StubCodeGenerator {
   }
 
 
-#endif // AARCH64
 
 #ifdef COMPILER2
   // Support for uint StubRoutine::Arm::partial_subtype_check( Klass sub, Klass super );
@@ -879,12 +765,7 @@ class StubGenerator: public StubCodeGenerator {
 
       // Return failure
       __ bind(L_fail);
-#ifdef AARCH64
-      // count_temp is 0, can't use ZR here
-      __ adds(R0, count_temp, 1); // sets the flags
-#else
       __ movs(R0, 1); // sets the flags
-#endif
       __ raw_pop(saved_set);
       __ ret();
     }
@@ -921,11 +802,7 @@ class StubGenerator: public StubCodeGenerator {
     Label exit, error;
     InlinedAddress verify_oop_count((address) StubRoutines::verify_oop_count_addr());
 
-#ifdef AARCH64
-    __ mrs(flags, Assembler::SysReg_NZCV);
-#else
     __ mrs(Assembler::CPSR, flags);
-#endif // AARCH64
 
     __ ldr_literal(tmp1, verify_oop_count);
     __ ldr_s32(tmp2, Address(tmp1));
@@ -952,11 +829,7 @@ class StubGenerator: public StubCodeGenerator {
     // return if everything seems ok
     __ bind(exit);
 
-#ifdef AARCH64
-    __ msr(Assembler::SysReg_NZCV, flags);
-#else
     __ msr(Assembler::CPSR_f, flags);
-#endif // AARCH64
 
     __ ret();
 
@@ -1002,9 +875,7 @@ class StubGenerator: public StubCodeGenerator {
     const Register to         = R1;
     const Register count      = R2;
     const Register to_from    = tmp1; // to - from
-#ifndef AARCH64
     const Register byte_count = (log2_elem_size == 0) ? count : tmp2; // count << log2_elem_size
-#endif // AARCH64
     assert_different_registers(from, to, count, tmp1, tmp2);
 
     // no_overlap version works if 'to' lower (unsigned) than 'from'
@@ -1012,114 +883,24 @@ class StubGenerator: public StubCodeGenerator {
 
     BLOCK_COMMENT("Array Overlap Test:");
     __ subs(to_from, to, from);
-#ifndef AARCH64
     if (log2_elem_size != 0) {
       __ mov(byte_count, AsmOperand(count, lsl, log2_elem_size));
     }
-#endif // !AARCH64
     if (NOLp == NULL)
       __ b(no_overlap_target,lo);
     else
       __ b((*NOLp), lo);
-#ifdef AARCH64
-    __ subs(ZR, to_from, count, ex_sxtw, log2_elem_size);
-#else
     __ cmp(to_from, byte_count);
-#endif // AARCH64
     if (NOLp == NULL)
       __ b(no_overlap_target, ge);
     else
       __ b((*NOLp), ge);
   }
 
-#ifdef AARCH64
-  // TODO-AARCH64: revise usages of bulk_* methods (probably ldp`s and stp`s should interlace)
-
-  // Loads [from, from + count*wordSize) into regs[0], regs[1], ..., regs[count-1]
-  // and increases 'from' by count*wordSize.
-  void bulk_load_forward(Register from, const Register regs[], int count) {
-    assert (count > 0 && count % 2 == 0, "count must be positive even number");
-    int bytes = count * wordSize;
-
-    int offset = 0;
-    __ ldp(regs[0], regs[1], Address(from, bytes, post_indexed));
-    offset += 2*wordSize;
-
-    for (int i = 2; i < count; i += 2) {
-      __ ldp(regs[i], regs[i+1], Address(from, -bytes + offset));
-      offset += 2*wordSize;
-    }
-
-    assert (offset == bytes, "must be");
-  }
-
-  // Stores regs[0], regs[1], ..., regs[count-1] to [to, to + count*wordSize)
-  // and increases 'to' by count*wordSize.
-  void bulk_store_forward(Register to, const Register regs[], int count) {
-    assert (count > 0 && count % 2 == 0, "count must be positive even number");
-    int bytes = count * wordSize;
-
-    int offset = 0;
-    __ stp(regs[0], regs[1], Address(to, bytes, post_indexed));
-    offset += 2*wordSize;
-
-    for (int i = 2; i < count; i += 2) {
-      __ stp(regs[i], regs[i+1], Address(to, -bytes + offset));
-      offset += 2*wordSize;
-    }
-
-    assert (offset == bytes, "must be");
-  }
-
-  // Loads [from - count*wordSize, from) into regs[0], regs[1], ..., regs[count-1]
-  // and decreases 'from' by count*wordSize.
-  // Note that the word with lowest address goes to regs[0].
-  void bulk_load_backward(Register from, const Register regs[], int count) {
-    assert (count > 0 && count % 2 == 0, "count must be positive even number");
-    int bytes = count * wordSize;
-
-    int offset = 0;
-
-    for (int i = count - 2; i > 0; i -= 2) {
-      offset += 2*wordSize;
-      __ ldp(regs[i], regs[i+1], Address(from, -offset));
-    }
-
-    offset += 2*wordSize;
-    __ ldp(regs[0], regs[1], Address(from, -bytes, pre_indexed));
-
-    assert (offset == bytes, "must be");
-  }
-
-  // Stores regs[0], regs[1], ..., regs[count-1] into [to - count*wordSize, to)
-  // and decreases 'to' by count*wordSize.
-  // Note that regs[0] value goes into the memory with lowest address.
-  void bulk_store_backward(Register to, const Register regs[], int count) {
-    assert (count > 0 && count % 2 == 0, "count must be positive even number");
-    int bytes = count * wordSize;
-
-    int offset = 0;
-
-    for (int i = count - 2; i > 0; i -= 2) {
-      offset += 2*wordSize;
-      __ stp(regs[i], regs[i+1], Address(to, -offset));
-    }
-
-    offset += 2*wordSize;
-    __ stp(regs[0], regs[1], Address(to, -bytes, pre_indexed));
 
-    assert (offset == bytes, "must be");
-  }
-#endif // AARCH64
-
-  // TODO-AARCH64: rearrange in-loop prefetches:
   //   probably we should choose between "prefetch-store before or after store", not "before or after load".
   void prefetch(Register from, Register to, int offset, int to_delta = 0) {
     __ prefetch_read(Address(from, offset));
-#ifdef AARCH64
-  // Next line commented out to avoid significant loss of performance in memory copy - JDK-8078120
-  // __ prfm(pstl1keep, Address(to, offset + to_delta));
-#endif // AARCH64
   }
 
   // Generate the inner loop for forward aligned array copy
@@ -1133,14 +914,14 @@ class StubGenerator: public StubCodeGenerator {
   // Return the minimum initial value for count
   //
   // Notes:
-  // - 'from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA, required for AArch64)
+  // - 'from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA)
   // - 'to' aligned on wordSize
   // - 'count' must be greater or equal than the returned value
   //
   // Increases 'from' and 'to' by count*bytes_per_count.
   //
   // Scratches 'count', R3.
-  // On AArch64 also scratches R4-R10; on 32-bit ARM R4-R10 are preserved (saved/restored).
+  // R4-R10 are preserved (saved/restored).
   //
   int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count) {
     assert (from == R0 && to == R1 && count == R2, "adjust the implementation below");
@@ -1150,7 +931,6 @@ class StubGenerator: public StubCodeGenerator {
     int pld_offset = config->pld_distance;
     const int count_per_loop = bytes_per_loop / bytes_per_count;
 
-#ifndef AARCH64
     bool split_read= config->split_ldm;
     bool split_write= config->split_stm;
 
@@ -1163,7 +943,6 @@ class StubGenerator: public StubCodeGenerator {
     //      BGE NEONCopyPLD
 
     __ push(RegisterSet(R4,R10));
-#endif // !AARCH64
 
     const bool prefetch_before = pld_offset < 0;
     const bool prefetch_after = pld_offset > 0;
@@ -1196,12 +975,7 @@ class StubGenerator: public StubCodeGenerator {
       };
     }
 
-#ifdef AARCH64
-    const Register data_regs[8] = {R3, R4, R5, R6, R7, R8, R9, R10};
-#endif // AARCH64
     {
-      // LDM (32-bit ARM) / LDP (AArch64) copy of 'bytes_per_loop' bytes
-
       // 32-bit ARM note: we have tried implementing loop unrolling to skip one
       // PLD with 64 bytes cache line but the gain was not significant.
 
@@ -1214,9 +988,6 @@ class StubGenerator: public StubCodeGenerator {
         __ BIND(L_skip_pld);
       }
 
-#ifdef AARCH64
-      bulk_load_forward(from, data_regs, 8);
-#else
       if (split_read) {
         // Split the register set in two sets so that there is less
         // latency between LDM and STM (R3-R6 available while R7-R10
@@ -1227,7 +998,6 @@ class StubGenerator: public StubCodeGenerator {
       } else {
         __ ldmia(from, RegisterSet(R3, R10), writeback);
       }
-#endif // AARCH64
 
       __ subs_32(count, count, count_per_loop);
 
@@ -1235,16 +1005,12 @@ class StubGenerator: public StubCodeGenerator {
         prefetch(from, to, pld_offset, bytes_per_loop);
       }
 
-#ifdef AARCH64
-      bulk_store_forward(to, data_regs, 8);
-#else
       if (split_write) {
         __ stmia(to, RegisterSet(R3, R6), writeback);
         __ stmia(to, RegisterSet(R7, R10), writeback);
       } else {
         __ stmia(to, RegisterSet(R3, R10), writeback);
       }
-#endif // AARCH64
 
       __ b(L_copy_loop, ge);
 
@@ -1260,70 +1026,6 @@ class StubGenerator: public StubCodeGenerator {
     // __ add(count, count, ...); // addition useless for the bit tests
     assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits");
 
-#ifdef AARCH64
-    assert (bytes_per_loop == 64, "adjust the code below");
-    assert (bytes_per_count <= 8, "adjust the code below");
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(32/bytes_per_count), L);
-
-      bulk_load_forward(from, data_regs, 4);
-      bulk_store_forward(to, data_regs, 4);
-
-      __ bind(L);
-    }
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(16/bytes_per_count), L);
-
-      bulk_load_forward(from, data_regs, 2);
-      bulk_store_forward(to, data_regs, 2);
-
-      __ bind(L);
-    }
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(8/bytes_per_count), L);
-
-      __ ldr(R3, Address(from, 8, post_indexed));
-      __ str(R3, Address(to,   8, post_indexed));
-
-      __ bind(L);
-    }
-
-    if (bytes_per_count <= 4) {
-      Label L;
-      __ tbz(count, exact_log2(4/bytes_per_count), L);
-
-      __ ldr_w(R3, Address(from, 4, post_indexed));
-      __ str_w(R3, Address(to,   4, post_indexed));
-
-      __ bind(L);
-    }
-
-    if (bytes_per_count <= 2) {
-      Label L;
-      __ tbz(count, exact_log2(2/bytes_per_count), L);
-
-      __ ldrh(R3, Address(from, 2, post_indexed));
-      __ strh(R3, Address(to,   2, post_indexed));
-
-      __ bind(L);
-    }
-
-    if (bytes_per_count <= 1) {
-      Label L;
-      __ tbz(count, 0, L);
-
-      __ ldrb(R3, Address(from, 1, post_indexed));
-      __ strb(R3, Address(to,   1, post_indexed));
-
-      __ bind(L);
-    }
-#else
     __ tst(count, 16 / bytes_per_count);
     __ ldmia(from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes
     __ stmia(to, RegisterSet(R3, R6), writeback, ne);
@@ -1351,7 +1053,6 @@ class StubGenerator: public StubCodeGenerator {
     }
 
     __ pop(RegisterSet(R4,R10));
-#endif // AARCH64
 
     return count_per_loop;
   }
@@ -1368,14 +1069,14 @@ class StubGenerator: public StubCodeGenerator {
   // Return the minimum initial value for count
   //
   // Notes:
-  // - 'end_from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA, required for AArch64)
+  // - 'end_from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA)
   // - 'end_to' aligned on wordSize
   // - 'count' must be greater or equal than the returned value
   //
   // Decreases 'end_from' and 'end_to' by count*bytes_per_count.
   //
   // Scratches 'count', R3.
-  // On AArch64 also scratches R4-R10; on 32-bit ARM R4-R10 are preserved (saved/restored).
+  // ARM R4-R10 are preserved (saved/restored).
   //
   int generate_backward_aligned_copy_loop(Register end_from, Register end_to, Register count, int bytes_per_count) {
     assert (end_from == R0 && end_to == R1 && count == R2, "adjust the implementation below");
@@ -1386,14 +1087,12 @@ class StubGenerator: public StubCodeGenerator {
     arraycopy_loop_config *config=&arraycopy_configurations[ArmCopyPlatform].backward_aligned;
     int pld_offset = config->pld_distance;
 
-#ifndef AARCH64
     bool split_read= config->split_ldm;
     bool split_write= config->split_stm;
 
     // See the forward copy variant for additional comments.
 
     __ push(RegisterSet(R4,R10));
-#endif // !AARCH64
 
     __ sub_32(count, count, count_per_loop);
 
@@ -1419,12 +1118,7 @@ class StubGenerator: public StubCodeGenerator {
       };
     }
 
-#ifdef AARCH64
-    const Register data_regs[8] = {R3, R4, R5, R6, R7, R8, R9, R10};
-#endif // AARCH64
     {
-      // LDM (32-bit ARM) / LDP (AArch64) copy of 'bytes_per_loop' bytes
-
       // 32-bit ARM note: we have tried implementing loop unrolling to skip one
       // PLD with 64 bytes cache line but the gain was not significant.
 
@@ -1437,16 +1131,12 @@ class StubGenerator: public StubCodeGenerator {
         __ BIND(L_skip_pld);
       }
 
-#ifdef AARCH64
-      bulk_load_backward(end_from, data_regs, 8);
-#else
       if (split_read) {
         __ ldmdb(end_from, RegisterSet(R7, R10), writeback);
         __ ldmdb(end_from, RegisterSet(R3, R6), writeback);
       } else {
         __ ldmdb(end_from, RegisterSet(R3, R10), writeback);
       }
-#endif // AARCH64
 
       __ subs_32(count, count, count_per_loop);
 
@@ -1454,16 +1144,12 @@ class StubGenerator: public StubCodeGenerator {
         prefetch(end_from, end_to, -(wordSize + pld_offset), -bytes_per_loop);
       }
 
-#ifdef AARCH64
-      bulk_store_backward(end_to, data_regs, 8);
-#else
       if (split_write) {
         __ stmdb(end_to, RegisterSet(R7, R10), writeback);
         __ stmdb(end_to, RegisterSet(R3, R6), writeback);
       } else {
         __ stmdb(end_to, RegisterSet(R3, R10), writeback);
       }
-#endif // AARCH64
 
       __ b(L_copy_loop, ge);
 
@@ -1478,70 +1164,6 @@ class StubGenerator: public StubCodeGenerator {
     // __ add(count, count, ...); // addition useless for the bit tests
     assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits");
 
-#ifdef AARCH64
-    assert (bytes_per_loop == 64, "adjust the code below");
-    assert (bytes_per_count <= 8, "adjust the code below");
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(32/bytes_per_count), L);
-
-      bulk_load_backward(end_from, data_regs, 4);
-      bulk_store_backward(end_to, data_regs, 4);
-
-      __ bind(L);
-    }
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(16/bytes_per_count), L);
-
-      bulk_load_backward(end_from, data_regs, 2);
-      bulk_store_backward(end_to, data_regs, 2);
-
-      __ bind(L);
-    }
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(8/bytes_per_count), L);
-
-      __ ldr(R3, Address(end_from, -8, pre_indexed));
-      __ str(R3, Address(end_to,   -8, pre_indexed));
-
-      __ bind(L);
-    }
-
-    if (bytes_per_count <= 4) {
-      Label L;
-      __ tbz(count, exact_log2(4/bytes_per_count), L);
-
-      __ ldr_w(R3, Address(end_from, -4, pre_indexed));
-      __ str_w(R3, Address(end_to,   -4, pre_indexed));
-
-      __ bind(L);
-    }
-
-    if (bytes_per_count <= 2) {
-      Label L;
-      __ tbz(count, exact_log2(2/bytes_per_count), L);
-
-      __ ldrh(R3, Address(end_from, -2, pre_indexed));
-      __ strh(R3, Address(end_to,   -2, pre_indexed));
-
-      __ bind(L);
-    }
-
-    if (bytes_per_count <= 1) {
-      Label L;
-      __ tbz(count, 0, L);
-
-      __ ldrb(R3, Address(end_from, -1, pre_indexed));
-      __ strb(R3, Address(end_to,   -1, pre_indexed));
-
-      __ bind(L);
-    }
-#else
     __ tst(count, 16 / bytes_per_count);
     __ ldmdb(end_from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes
     __ stmdb(end_to, RegisterSet(R3, R6), writeback, ne);
@@ -1569,15 +1191,13 @@ class StubGenerator: public StubCodeGenerator {
     }
 
     __ pop(RegisterSet(R4,R10));
-#endif // AARCH64
 
     return count_per_loop;
   }
 
 
   // Generate the inner loop for shifted forward array copy (unaligned copy).
-  // It can be used when bytes_per_count < wordSize, i.e.
-  //  byte/short copy on 32-bit ARM, byte/short/int/compressed-oop copy on AArch64.
+  // It can be used when bytes_per_count < wordSize, i.e. byte/short copy
   //
   // Arguments
   //      from:      start src address, 64 bits aligned
@@ -1590,11 +1210,11 @@ class StubGenerator: public StubCodeGenerator {
   // Return the minimum initial value for count
   //
   // Notes:
-  // - 'from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA, required for AArch64)
+  // - 'from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA)
   // - 'to' aligned on wordSize
   // - 'count' must be greater or equal than the returned value
   // - 'lsr_shift' + 'lsl_shift' = BitsPerWord
-  // - 'bytes_per_count' is 1 or 2 on 32-bit ARM; 1, 2 or 4 on AArch64
+  // - 'bytes_per_count' is 1 or 2
   //
   // Increases 'to' by count*bytes_per_count.
   //
@@ -1618,10 +1238,8 @@ class StubGenerator: public StubCodeGenerator {
     arraycopy_loop_config *config=&arraycopy_configurations[ArmCopyPlatform].forward_shifted;
     int pld_offset = config->pld_distance;
 
-#ifndef AARCH64
     bool split_read= config->split_ldm;
     bool split_write= config->split_stm;
-#endif // !AARCH64
 
     const bool prefetch_before = pld_offset < 0;
     const bool prefetch_after = pld_offset > 0;
@@ -1662,12 +1280,6 @@ class StubGenerator: public StubCodeGenerator {
       __ b(L_last_read, lt);
     }
 
-#ifdef AARCH64
-    const Register data_regs[9] = {R3, R4, R5, R6, R7, R8, R9, R10, R12};
-    __ logical_shift_right(R3, R12, lsr_shift); // part of R12 not yet written
-    __ subs_32(count, count, count_per_loop);
-    bulk_load_forward(from, &data_regs[1], 8);
-#else
     // read 32 bytes
     if (split_read) {
       // if write is not split, use less registers in first set to reduce locking
@@ -1682,7 +1294,6 @@ class StubGenerator: public StubCodeGenerator {
       __ ldmia(from, RegisterSet(R4, R10) | R12, writeback); // Note: small latency on R4
       __ subs(count, count, count_per_loop);
     }
-#endif // AARCH64
 
     if (prefetch_after) {
       // do it after the 1st ldm/ldp anyway  (no locking issues with early STM/STP)
@@ -1697,12 +1308,10 @@ class StubGenerator: public StubCodeGenerator {
     __ orr(R5, R5, AsmOperand(R6, lsl, lsl_shift));
     __ logical_shift_right(R6, R6, lsr_shift);
     __ orr(R6, R6, AsmOperand(R7, lsl, lsl_shift));
-#ifndef AARCH64
     if (split_write) {
       // write the first half as soon as possible to reduce stm locking
       __ stmia(to, RegisterSet(R3, R6), writeback, prefetch_before ? gt : ge);
     }
-#endif // !AARCH64
     __ logical_shift_right(R7, R7, lsr_shift);
     __ orr(R7, R7, AsmOperand(R8, lsl, lsl_shift));
     __ logical_shift_right(R8, R8, lsr_shift);
@@ -1712,23 +1321,17 @@ class StubGenerator: public StubCodeGenerator {
     __ logical_shift_right(R10, R10, lsr_shift);
     __ orr(R10, R10, AsmOperand(R12, lsl, lsl_shift));
 
-#ifdef AARCH64
-    bulk_store_forward(to, data_regs, 8);
-#else
     if (split_write) {
       __ stmia(to, RegisterSet(R7, R10), writeback, prefetch_before ? gt : ge);
     } else {
       __ stmia(to, RegisterSet(R3, R10), writeback, prefetch_before ? gt : ge);
     }
-#endif // AARCH64
     __ b(L_shifted_loop, gt); // no need to loop if 0 (when count need not be precise modulo bytes_per_loop)
 
     if (prefetch_before) {
       // the first loop may end earlier, allowing to skip pld at the end
       __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count);
-#ifndef AARCH64
       __ stmia(to, RegisterSet(R3, R10), writeback); // stmia was skipped
-#endif // !AARCH64
       __ b(L_skip_pld, ge);
       __ adds_32(count, count, ((bytes_per_loop + pld_offset) / bytes_per_count) + count_per_loop);
     }
@@ -1736,90 +1339,6 @@ class StubGenerator: public StubCodeGenerator {
     __ BIND(L_last_read);
     __ b(L_done, eq);
 
-#ifdef AARCH64
-    assert(bytes_per_count < 8, "adjust the code below");
-
-    __ logical_shift_right(R3, R12, lsr_shift);
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(32/bytes_per_count), L);
-      bulk_load_forward(from, &data_regs[1], 4);
-      __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift));
-      __ logical_shift_right(R4, R4, lsr_shift);
-      __ orr(R4, R4, AsmOperand(R5, lsl, lsl_shift));
-      __ logical_shift_right(R5, R5, lsr_shift);
-      __ orr(R5, R5, AsmOperand(R6, lsl, lsl_shift));
-      __ logical_shift_right(R6, R6, lsr_shift);
-      __ orr(R6, R6, AsmOperand(R7, lsl, lsl_shift));
-      bulk_store_forward(to, data_regs, 4);
-      __ logical_shift_right(R3, R7, lsr_shift);
-      __ bind(L);
-    }
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(16/bytes_per_count), L);
-      bulk_load_forward(from, &data_regs[1], 2);
-      __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift));
-      __ logical_shift_right(R4, R4, lsr_shift);
-      __ orr(R4, R4, AsmOperand(R5, lsl, lsl_shift));
-      bulk_store_forward(to, data_regs, 2);
-      __ logical_shift_right(R3, R5, lsr_shift);
-      __ bind(L);
-    }
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(8/bytes_per_count), L);
-      __ ldr(R4, Address(from, 8, post_indexed));
-      __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift));
-      __ str(R3, Address(to, 8, post_indexed));
-      __ logical_shift_right(R3, R4, lsr_shift);
-      __ bind(L);
-    }
-
-    const int have_bytes = lsl_shift/BitsPerByte; // number of already read bytes in R3
-
-    // It remains less than wordSize to write.
-    // Do not check count if R3 already has maximal number of loaded elements (one less than wordSize).
-    if (have_bytes < wordSize - bytes_per_count) {
-      Label L;
-      __ andr(count, count, (uintx)(8/bytes_per_count-1)); // make count exact
-      __ cmp_32(count, have_bytes/bytes_per_count); // do we have enough bytes to store?
-      __ b(L, le);
-      __ ldr(R4, Address(from, 8, post_indexed));
-      __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift));
-      __ bind(L);
-    }
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(4/bytes_per_count), L);
-      __ str_w(R3, Address(to, 4, post_indexed));
-      if (bytes_per_count < 4) {
-        __ logical_shift_right(R3, R3, 4*BitsPerByte);
-      }
-      __ bind(L);
-    }
-
-    if (bytes_per_count <= 2) {
-      Label L;
-      __ tbz(count, exact_log2(2/bytes_per_count), L);
-      __ strh(R3, Address(to, 2, post_indexed));
-      if (bytes_per_count < 2) {
-        __ logical_shift_right(R3, R3, 2*BitsPerByte);
-      }
-      __ bind(L);
-    }
-
-    if (bytes_per_count <= 1) {
-      Label L;
-      __ tbz(count, exact_log2(1/bytes_per_count), L);
-      __ strb(R3, Address(to, 1, post_indexed));
-      __ bind(L);
-    }
-#else
     switch (bytes_per_count) {
     case 2:
       __ mov(R3, AsmOperand(R12, lsr, lsr_shift));
@@ -1902,15 +1421,13 @@ class StubGenerator: public StubCodeGenerator {
       __ strb(R3, Address(to, 1, post_indexed), ne); // one last byte
       break;
     }
-#endif // AARCH64
 
     __ BIND(L_done);
     return 0; // no minimum
   }
 
   // Generate the inner loop for shifted backward array copy (unaligned copy).
-  // It can be used when bytes_per_count < wordSize, i.e.
-  //  byte/short copy on 32-bit ARM, byte/short/int/compressed-oop copy on AArch64.
+  // It can be used when bytes_per_count < wordSize, i.e. byte/short copy
   //
   // Arguments
   //      end_from:  end src address, 64 bits aligned
@@ -1923,11 +1440,11 @@ class StubGenerator: public StubCodeGenerator {
   // Return the minimum initial value for count
   //
   // Notes:
-  // - 'end_from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA, required for AArch64)
+  // - 'end_from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA)
   // - 'end_to' aligned on wordSize
   // - 'count' must be greater or equal than the returned value
   // - 'lsr_shift' + 'lsl_shift' = 'BitsPerWord'
-  // - 'bytes_per_count' is 1 or 2 on 32-bit ARM; 1, 2 or 4 on AArch64
+  // - 'bytes_per_count' is 1 or 2 on 32-bit ARM
   //
   // Decreases 'end_to' by count*bytes_per_count.
   //
@@ -1951,10 +1468,8 @@ class StubGenerator: public StubCodeGenerator {
     arraycopy_loop_config *config=&arraycopy_configurations[ArmCopyPlatform].backward_shifted;
     int pld_offset = config->pld_distance;
 
-#ifndef AARCH64
     bool split_read= config->split_ldm;
     bool split_write= config->split_stm;
-#endif // !AARCH64
 
 
     const bool prefetch_before = pld_offset < 0;
@@ -1997,11 +1512,6 @@ class StubGenerator: public StubCodeGenerator {
       __ b(L_last_read, lt);
     }
 
-#ifdef AARCH64
-    __ logical_shift_left(R12, R3, lsl_shift);
-    const Register data_regs[9] = {R3, R4, R5, R6, R7, R8, R9, R10, R12};
-    bulk_load_backward(end_from, data_regs, 8);
-#else
     if (split_read) {
       __ ldmdb(end_from, RegisterSet(R7, R10), writeback);
       __ mov(R12, AsmOperand(R3, lsl, lsl_shift)); // part of R3 not yet written
@@ -2010,7 +1520,6 @@ class StubGenerator: public StubCodeGenerator {
       __ mov(R12, AsmOperand(R3, lsl, lsl_shift)); // part of R3 not yet written
       __ ldmdb(end_from, RegisterSet(R3, R10), writeback);
     }
-#endif // AARCH64
 
     __ subs_32(count, count, count_per_loop);
 
@@ -2030,35 +1539,27 @@ class StubGenerator: public StubCodeGenerator {
     __ orr(R7, R7, AsmOperand(R6, lsr, lsr_shift));
     __ logical_shift_left(R6, R6, lsl_shift);
     __ orr(R6, R6, AsmOperand(R5, lsr, lsr_shift));
-#ifndef AARCH64
     if (split_write) {
       // store early to reduce locking issues
       __ stmdb(end_to, RegisterSet(R6, R10) | R12, writeback, prefetch_before ? gt : ge);
     }
-#endif // !AARCH64
     __ logical_shift_left(R5, R5, lsl_shift);
     __ orr(R5, R5, AsmOperand(R4, lsr, lsr_shift));
     __ logical_shift_left(R4, R4, lsl_shift);
     __ orr(R4, R4, AsmOperand(R3, lsr, lsr_shift));
 
-#ifdef AARCH64
-    bulk_store_backward(end_to, &data_regs[1], 8);
-#else
     if (split_write) {
       __ stmdb(end_to, RegisterSet(R4, R5), writeback, prefetch_before ? gt : ge);
     } else {
       __ stmdb(end_to, RegisterSet(R4, R10) | R12, writeback, prefetch_before ? gt : ge);
     }
-#endif // AARCH64
 
     __ b(L_shifted_loop, gt); // no need to loop if 0 (when count need not be precise modulo bytes_per_loop)
 
     if (prefetch_before) {
       // the first loop may end earlier, allowing to skip pld at the end
       __ cmn_32(count, ((bytes_per_loop + pld_offset)/bytes_per_count));
-#ifndef AARCH64
       __ stmdb(end_to, RegisterSet(R4, R10) | R12, writeback); // stmdb was skipped
-#endif // !AARCH64
       __ b(L_skip_pld, ge);
       __ adds_32(count, count, ((bytes_per_loop + pld_offset) / bytes_per_count) + count_per_loop);
     }
@@ -2066,99 +1567,6 @@ class StubGenerator: public StubCodeGenerator {
     __ BIND(L_last_read);
     __ b(L_done, eq);
 
-#ifdef AARCH64
-    assert(bytes_per_count < 8, "adjust the code below");
-
-    __ logical_shift_left(R12, R3, lsl_shift);
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(32/bytes_per_count), L);
-      bulk_load_backward(end_from, &data_regs[4], 4);
-
-      __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift));
-      __ logical_shift_left(R10, R10, lsl_shift);
-      __ orr(R10, R10, AsmOperand(R9, lsr, lsr_shift));
-      __ logical_shift_left(R9, R9, lsl_shift);
-      __ orr(R9, R9, AsmOperand(R8, lsr, lsr_shift));
-      __ logical_shift_left(R8, R8, lsl_shift);
-      __ orr(R8, R8, AsmOperand(R7, lsr, lsr_shift));
-
-      bulk_store_backward(end_to, &data_regs[5], 4);
-      __ logical_shift_left(R12, R7, lsl_shift);
-      __ bind(L);
-    }
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(16/bytes_per_count), L);
-      bulk_load_backward(end_from, &data_regs[6], 2);
-
-      __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift));
-      __ logical_shift_left(R10, R10, lsl_shift);
-      __ orr(R10, R10, AsmOperand(R9, lsr, lsr_shift));
-
-      bulk_store_backward(end_to, &data_regs[7], 2);
-      __ logical_shift_left(R12, R9, lsl_shift);
-      __ bind(L);
-    }
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(8/bytes_per_count), L);
-      __ ldr(R10, Address(end_from, -8, pre_indexed));
-      __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift));
-      __ str(R12, Address(end_to, -8, pre_indexed));
-      __ logical_shift_left(R12, R10, lsl_shift);
-      __ bind(L);
-    }
-
-    const int have_bytes = lsr_shift/BitsPerByte; // number of already read bytes in R12
-
-    // It remains less than wordSize to write.
-    // Do not check count if R12 already has maximal number of loaded elements (one less than wordSize).
-    if (have_bytes < wordSize - bytes_per_count) {
-      Label L;
-      __ andr(count, count, (uintx)(8/bytes_per_count-1)); // make count exact
-      __ cmp_32(count, have_bytes/bytes_per_count); // do we have enough bytes to store?
-      __ b(L, le);
-      __ ldr(R10, Address(end_from, -8, pre_indexed));
-      __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift));
-      __ bind(L);
-    }
-
-    assert (bytes_per_count <= 4, "must be");
-
-    {
-      Label L;
-      __ tbz(count, exact_log2(4/bytes_per_count), L);
-      __ logical_shift_right(R9, R12, (wordSize-4)*BitsPerByte);
-      __ str_w(R9, Address(end_to, -4, pre_indexed)); // Write 4 MSB
-      if (bytes_per_count < 4) {
-        __ logical_shift_left(R12, R12, 4*BitsPerByte); // Promote remaining bytes to MSB
-      }
-      __ bind(L);
-    }
-
-    if (bytes_per_count <= 2) {
-      Label L;
-      __ tbz(count, exact_log2(2/bytes_per_count), L);
-      __ logical_shift_right(R9, R12, (wordSize-2)*BitsPerByte);
-      __ strh(R9, Address(end_to, -2, pre_indexed)); // Write 2 MSB
-      if (bytes_per_count < 2) {
-        __ logical_shift_left(R12, R12, 2*BitsPerByte); // Promote remaining bytes to MSB
-      }
-      __ bind(L);
-    }
-
-    if (bytes_per_count <= 1) {
-      Label L;
-      __ tbz(count, exact_log2(1/bytes_per_count), L);
-      __ logical_shift_right(R9, R12, (wordSize-1)*BitsPerByte);
-      __ strb(R9, Address(end_to, -1, pre_indexed)); // Write 1 MSB
-      __ bind(L);
-    }
-#else
       switch(bytes_per_count) {
       case 2:
       __ mov(R12, AsmOperand(R3, lsl, lsl_shift)); // part of R3 not yet written
@@ -2242,7 +1650,6 @@ class StubGenerator: public StubCodeGenerator {
 
       break;
       }
-#endif // AARCH64
 
     __ BIND(L_done);
     return 0; // no minimum
@@ -2257,27 +1664,6 @@ class StubGenerator: public StubCodeGenerator {
     }
   }
 
-#ifdef AARCH64
-  // Loads one 'size_in_bytes'-sized value from 'from' in given direction, i.e.
-  //   if forward:  loads value at from and increases from by size
-  //   if !forward: loads value at from-size_in_bytes and decreases from by size
-  void load_one(Register rd, Register from, int size_in_bytes, bool forward) {
-    assert_different_registers(from, rd);
-    Address addr = get_addr_with_indexing(from, size_in_bytes, forward);
-    __ load_sized_value(rd, addr, size_in_bytes, false);
-  }
-
-  // Stores one 'size_in_bytes'-sized value to 'to' in given direction (see load_one)
-  void store_one(Register rd, Register to, int size_in_bytes, bool forward) {
-    assert_different_registers(to, rd);
-    Address addr = get_addr_with_indexing(to, size_in_bytes, forward);
-    __ store_sized_value(rd, addr, size_in_bytes);
-  }
-#else
-  // load_one and store_one are the same as for AArch64 except for
-  //   *) Support for condition execution
-  //   *) Second value register argument for 8-byte values
-
   void load_one(Register rd, Register from, int size_in_bytes, bool forward, AsmCondition cond = al, Register rd2 = noreg) {
     assert_different_registers(from, rd, rd2);
     if (size_in_bytes < 8) {
@@ -2311,7 +1697,6 @@ class StubGenerator: public StubCodeGenerator {
       }
     }
   }
-#endif // AARCH64
 
   // Copies data from 'from' to 'to' in specified direction to align 'from' by 64 bits.
   // (on 32-bit ARM 64-bit alignment is better for LDM).
@@ -2332,36 +1717,6 @@ class StubGenerator: public StubCodeGenerator {
   // Returns maximum number of bytes which may be copied.
   int align_src(Register from, Register to, Register count, Register tmp, int bytes_per_count, bool forward) {
     assert_different_registers(from, to, count, tmp);
-#ifdef AARCH64
-    // TODO-AARCH64: replace by simple loop?
-    Label Laligned_by_2, Laligned_by_4, Laligned_by_8;
-
-    if (bytes_per_count == 1) {
-      __ tbz(from, 0, Laligned_by_2);
-      __ sub_32(count, count, 1);
-      load_one(tmp, from, 1, forward);
-      store_one(tmp, to, 1, forward);
-    }
-
-    __ BIND(Laligned_by_2);
-
-    if (bytes_per_count <= 2) {
-      __ tbz(from, 1, Laligned_by_4);
-      __ sub_32(count, count, 2/bytes_per_count);
-      load_one(tmp, from, 2, forward);
-      store_one(tmp, to, 2, forward);
-    }
-
-    __ BIND(Laligned_by_4);
-
-    if (bytes_per_count <= 4) {
-      __ tbz(from, 2, Laligned_by_8);
-      __ sub_32(count, count, 4/bytes_per_count);
-      load_one(tmp, from, 4, forward);
-      store_one(tmp, to, 4, forward);
-    }
-    __ BIND(Laligned_by_8);
-#else // AARCH64
     if (bytes_per_count < 8) {
       Label L_align_src;
       __ BIND(L_align_src);
@@ -2374,7 +1729,6 @@ class StubGenerator: public StubCodeGenerator {
         __ b(L_align_src, ne); // if bytes_per_count == 4, then 0 or 1 loop iterations are enough
       }
     }
-#endif // AARCH64
     return 7/bytes_per_count;
   }
 
@@ -2394,19 +1748,6 @@ class StubGenerator: public StubCodeGenerator {
     assert_different_registers(from, to, count, tmp);
 
     __ align(OptoLoopAlignment);
-#ifdef AARCH64
-    Label L_small_array_done, L_small_array_loop;
-    __ BIND(entry);
-    __ cbz_32(count, L_small_array_done);
-
-    __ BIND(L_small_array_loop);
-    __ subs_32(count, count, 1);
-    load_one(tmp, from, bytes_per_count, forward);
-    store_one(tmp, to, bytes_per_count, forward);
-    __ b(L_small_array_loop, gt);
-
-    __ BIND(L_small_array_done);
-#else
     Label L_small_loop;
     __ BIND(L_small_loop);
     store_one(tmp, to, bytes_per_count, forward, al, tmp2);
@@ -2414,7 +1755,6 @@ class StubGenerator: public StubCodeGenerator {
     __ subs(count, count, 1);
     load_one(tmp, from, bytes_per_count, forward, ge, tmp2);
     __ b(L_small_loop, ge);
-#endif // AARCH64
   }
 
   // Aligns 'to' by reading one word from 'from' and writting its part to 'to'.
@@ -2496,7 +1836,7 @@ class StubGenerator: public StubCodeGenerator {
 
     assert (0 < to_remainder && to_remainder < wordSize, "to_remainder is invalid");
 
-    const Register tmp  = forward ? R3 : R12; // TODO-AARCH64: on cojoint_short R4 was used for tmp
+    const Register tmp  = forward ? R3 : R12;
     assert_different_registers(from, to, count, Rval, tmp);
 
     int required_to_align = align_dst(to, count, Rval, tmp, to_remainder, bytes_per_count, forward);
@@ -2530,7 +1870,7 @@ class StubGenerator: public StubCodeGenerator {
   //     shifts 'to' by the number of copied bytes
   //
   // Scratches 'from', 'count', R3 and R12.
-  // On AArch64 also scratches R4-R10, on 32-bit ARM saves them to use.
+  // R4-R10 saved for use.
   int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward) {
 
     const Register Rval = forward ? R12 : R3; // as generate_{forward,backward}_shifted_copy_loop expect
@@ -2540,100 +1880,6 @@ class StubGenerator: public StubCodeGenerator {
     // Note: if {seq} is a sequence of numbers, L{seq} means that if the execution reaches this point,
     // then the remainder of 'to' divided by wordSize is one of elements of {seq}.
 
-#ifdef AARCH64
-    // TODO-AARCH64: simplify, tune
-
-    load_one(Rval, from, wordSize, forward);
-
-    Label L_loop_finished;
-
-    switch (bytes_per_count) {
-      case 4:
-        min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward);
-        break;
-      case 2:
-      {
-        Label L2, L4, L6;
-
-        __ tbz(to, 1, L4);
-        __ tbz(to, 2, L2);
-
-        __ BIND(L6);
-        int min_copy6 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 6, bytes_per_count, forward);
-        __ b(L_loop_finished);
-
-        __ BIND(L2);
-        int min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
-        __ b(L_loop_finished);
-
-        __ BIND(L4);
-        int min_copy4 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward);
-
-        min_copy = MAX2(MAX2(min_copy2, min_copy4), min_copy6);
-        break;
-      }
-      case 1:
-      {
-        Label L1, L2, L3, L4, L5, L6, L7;
-        Label L15, L26;
-        Label L246;
-
-        __ tbz(to, 0, L246);
-        __ tbz(to, 1, L15);
-        __ tbz(to, 2, L3);
-
-        __ BIND(L7);
-        int min_copy7 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 7, bytes_per_count, forward);
-        __ b(L_loop_finished);
-
-        __ BIND(L246);
-        __ tbnz(to, 1, L26);
-
-        __ BIND(L4);
-        int min_copy4 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward);
-        __ b(L_loop_finished);
-
-        __ BIND(L15);
-        __ tbz(to, 2, L1);
-
-        __ BIND(L5);
-        int min_copy5 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 5, bytes_per_count, forward);
-        __ b(L_loop_finished);
-
-        __ BIND(L3);
-        int min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
-        __ b(L_loop_finished);
-
-        __ BIND(L26);
-        __ tbz(to, 2, L2);
-
-        __ BIND(L6);
-        int min_copy6 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 6, bytes_per_count, forward);
-        __ b(L_loop_finished);
-
-        __ BIND(L1);
-        int min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
-        __ b(L_loop_finished);
-
-        __ BIND(L2);
-        int min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
-
-
-        min_copy = MAX2(min_copy1, min_copy2);
-        min_copy = MAX2(min_copy,  min_copy3);
-        min_copy = MAX2(min_copy,  min_copy4);
-        min_copy = MAX2(min_copy,  min_copy5);
-        min_copy = MAX2(min_copy,  min_copy6);
-        min_copy = MAX2(min_copy,  min_copy7);
-        break;
-      }
-      default:
-        ShouldNotReachHere();
-        break;
-    }
-    __ BIND(L_loop_finished);
-
-#else
     __ push(RegisterSet(R4,R10));
     load_one(Rval, from, wordSize, forward);
 
@@ -2690,7 +1936,6 @@ class StubGenerator: public StubCodeGenerator {
     }
 
     __ pop(RegisterSet(R4,R10));
-#endif // AARCH64
 
     return min_copy;
   }
@@ -2772,7 +2017,7 @@ class StubGenerator: public StubCodeGenerator {
 
     Label L_small_array;
     __ cmp_32(count, small_copy_limit);
-    __ b(L_small_array, le); // TODO-AARCH64: le vs lt
+    __ b(L_small_array, le);
 
     // Otherwise proceed with large implementation.
 
@@ -2860,7 +2105,7 @@ class StubGenerator: public StubCodeGenerator {
   //               if 'forward' then 'to' == upper bound, else 'to' == beginning of the modified region
   //     count:    total number of copied elements, 32-bit int
   //
-  // Blows all volatile (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) and 'to', 'count', 'tmp' registers.
+  // Blows all volatile R0-R3, Rtemp, LR) and 'to', 'count', 'tmp' registers.
   void oop_arraycopy_stub_epilogue_helper(Register to, Register count, Register tmp, bool status, bool forward, DecoratorSet decorators) {
     assert_different_registers(to, count, tmp);
 
@@ -2879,12 +2124,7 @@ class StubGenerator: public StubCodeGenerator {
       __ mov(R0, 0); // OK
     }
 
-#ifdef AARCH64
-    __ raw_pop(LR, ZR);
-    __ ret();
-#else
     __ pop(PC);
-#endif // AARCH64
   }
 
 
@@ -2935,11 +2175,7 @@ class StubGenerator: public StubCodeGenerator {
     const int callee_saved_regs = 3; // R0-R2
 
     // LR is used later to save barrier args
-#ifdef AARCH64
-    __ raw_push(LR, ZR);
-#else
     __ push(LR);
-#endif // AARCH64
 
     DecoratorSet decorators = IN_HEAP | IS_ARRAY;
     if (disjoint) {
@@ -3017,13 +2253,8 @@ class StubGenerator: public StubCodeGenerator {
     }
 
     if (!to_is_aligned) {
-      // !to_is_aligned <=> UseCompressedOops && AArch64
       __ BIND(L_unaligned_dst);
-#ifdef AARCH64
-      assert (UseCompressedOops, "unaligned oop array copy may be requested only with UseCompressedOops");
-#else
       ShouldNotReachHere();
-#endif // AARCH64
       int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
       assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
 
@@ -3056,10 +2287,6 @@ class StubGenerator: public StubCodeGenerator {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
-#ifdef AARCH64
-    __ NOT_IMPLEMENTED();
-    start = NULL;
-#else
     const Register tmp = Rtemp;
 
     // bump this on entry, not on exit:
@@ -3081,7 +2308,6 @@ class StubGenerator: public StubCodeGenerator {
     __ jump(StubRoutines::_jshort_arraycopy, relocInfo::runtime_call_type, tmp, eq);
 
     __ jump(StubRoutines::_jbyte_arraycopy, relocInfo::runtime_call_type, tmp);
-#endif
     return start;
   }
 
@@ -3181,7 +2407,7 @@ class StubGenerator: public StubCodeGenerator {
   //      to:    R1
   //      count: R2 treated as signed 32-bit int
   //      ckoff: R3 (super_check_offset)
-  //      ckval: R4 (AArch64) / SP[0] (32-bit ARM) (super_klass)
+  //      ckval: R4 (super_klass)
   //      ret:   R0 zero for success; (-1^K) where K is partial transfer count (32-bit)
   //
   address generate_checkcast_copy(const char * name) {
@@ -3196,7 +2422,7 @@ class StubGenerator: public StubCodeGenerator {
     const Register R3_ckoff  = R3;      // super_check_offset
     const Register R4_ckval  = R4;      // super_klass
 
-    const int callee_saved_regs = AARCH64_ONLY(5) NOT_AARCH64(4); // LR saved differently
+    const int callee_saved_regs = 4; // LR saved differently
 
     Label load_element, store_element, do_epilogue, fail;
 
@@ -3204,52 +2430,34 @@ class StubGenerator: public StubCodeGenerator {
 
     __ zap_high_non_significant_bits(R2);
 
-#ifdef AARCH64
-    __ raw_push(LR, ZR);
-    __ raw_push(R19, R20);
-#else
     int pushed = 0;
     __ push(LR);
     pushed+=1;
-#endif // AARCH64
 
     DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST;
 
     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, true, to, count, callee_saved_regs);
 
-#ifndef AARCH64
     const RegisterSet caller_saved_regs = RegisterSet(R4,R6) | RegisterSet(R8,R9) | altFP_7_11;
     __ push(caller_saved_regs);
     assert(caller_saved_regs.size() == 6, "check the count");
     pushed+=6;
 
     __ ldr(R4_ckval,Address(SP, wordSize*pushed)); // read the argument that was on the stack
-#endif // !AARCH64
 
     // Save arguments for barrier generation (after the pre barrier):
     // - must be a caller saved register and not LR
     // - ARM32: avoid R10 in case RThread is needed
-    const Register saved_count = AARCH64_ONLY(R19) NOT_AARCH64(altFP_7_11);
-#ifdef AARCH64
-    __ mov_w(saved_count, count);
-    __ cbnz_w(count, load_element); // and test count
-#else
+    const Register saved_count = altFP_7_11;
     __ movs(saved_count, count); // and test count
     __ b(load_element,ne);
-#endif // AARCH64
 
     // nothing to copy
     __ mov(R0, 0);
 
-#ifdef AARCH64
-    __ raw_pop(R19, R20);
-    __ raw_pop(LR, ZR);
-    __ ret();
-#else
     __ pop(caller_saved_regs);
     __ pop(PC);
-#endif // AARCH64
 
     // ======== begin loop ========
     // (Loop is rotated; its entry is load_element.)
@@ -3286,7 +2494,7 @@ class StubGenerator: public StubCodeGenerator {
 
     __ BIND(do_epilogue);
 
-    Register copied = AARCH64_ONLY(R20) NOT_AARCH64(R4); // saved
+    Register copied = R4; // saved
     Label L_not_copied;
 
     __ subs_32(copied, saved_count, count); // copied count (in saved reg)
@@ -3302,17 +2510,10 @@ class StubGenerator: public StubCodeGenerator {
     __ BIND(L_not_copied);
     __ cmp_32(copied, saved_count); // values preserved in saved registers
 
-#ifdef AARCH64
-    __ csinv(R0, ZR, copied, eq); // 0 if all copied else NOT(copied)
-    __ raw_pop(R19, R20);
-    __ raw_pop(LR, ZR);
-    __ ret();
-#else
     __ mov(R0, 0, eq); // 0 if all copied
     __ mvn(R0, copied, ne); // else NOT(copied)
     __ pop(caller_saved_regs);
     __ pop(PC);
-#endif // AARCH64
 
     return start;
   }
@@ -3356,7 +2557,7 @@ class StubGenerator: public StubCodeGenerator {
   //    R1    -  src_pos (32-bit int)
   //    R2    -  dst oop
   //    R3    -  dst_pos (32-bit int)
-  //    R4 (AArch64) / SP[0] (32-bit ARM) -  element count (32-bit int)
+  //    SP[0] -  element count (32-bit int)
   //
   //  Output: (32-bit int)
   //    R0 ==  0  -  success
@@ -3374,7 +2575,7 @@ class StubGenerator: public StubCodeGenerator {
     // registers used as temp
     const Register R5_src_klass = R5; // source array klass
     const Register R6_dst_klass = R6; // destination array klass
-    const Register R_lh         = AARCH64_ONLY(R7) NOT_AARCH64(altFP_7_11); // layout handler
+    const Register R_lh         = altFP_7_11; // layout handler
     const Register R8_temp      = R8;
 
     __ align(CodeEntryAlignment);
@@ -3385,21 +2586,17 @@ class StubGenerator: public StubCodeGenerator {
     __ zap_high_non_significant_bits(R3);
     __ zap_high_non_significant_bits(R4);
 
-#ifndef AARCH64
     int pushed = 0;
     const RegisterSet saved_regs = RegisterSet(R4,R6) | RegisterSet(R8,R9) | altFP_7_11;
     __ push(saved_regs);
     assert(saved_regs.size() == 6, "check the count");
     pushed+=6;
-#endif // !AARCH64
 
     // bump this on entry, not on exit:
     inc_counter_np(SharedRuntime::_generic_array_copy_ctr, R5, R12);
 
     const Register length   = R4;  // elements count
-#ifndef AARCH64
     __ ldr(length, Address(SP,4*pushed));
-#endif // !AARCH64
 
 
     //-----------------------------------------------------------------------
@@ -3492,43 +2689,6 @@ class StubGenerator: public StubCodeGenerator {
       // 'from', 'to', 'count' registers should be set in this order
       // since they are the same as 'src', 'src_pos', 'dst'.
 
-#ifdef AARCH64
-
-      BLOCK_COMMENT("choose copy loop based on element size and scale indexes");
-      Label Lbyte, Lshort, Lint, Llong;
-
-      __ cbz(R12_elsize, Lbyte);
-
-      assert (LogBytesPerShort < LogBytesPerInt && LogBytesPerInt < LogBytesPerLong, "must be");
-      __ cmp(R12_elsize, LogBytesPerInt);
-      __ b(Lint,  eq);
-      __ b(Llong, gt);
-
-      __ BIND(Lshort);
-      __ add_ptr_scaled_int32(from, src, src_pos, LogBytesPerShort);
-      __ add_ptr_scaled_int32(to,   dst, dst_pos, LogBytesPerShort);
-      __ mov(count, length);
-      __ b(StubRoutines::_jshort_arraycopy);
-
-      __ BIND(Lint);
-      __ add_ptr_scaled_int32(from, src, src_pos, LogBytesPerInt);
-      __ add_ptr_scaled_int32(to,   dst, dst_pos, LogBytesPerInt);
-      __ mov(count, length);
-      __ b(StubRoutines::_jint_arraycopy);
-
-      __ BIND(Lbyte);
-      __ add_ptr_scaled_int32(from, src, src_pos, 0);
-      __ add_ptr_scaled_int32(to,   dst, dst_pos, 0);
-      __ mov(count, length);
-      __ b(StubRoutines::_jbyte_arraycopy);
-
-      __ BIND(Llong);
-      __ add_ptr_scaled_int32(from, src, src_pos, LogBytesPerLong);
-      __ add_ptr_scaled_int32(to,   dst, dst_pos, LogBytesPerLong);
-      __ mov(count, length);
-      __ b(StubRoutines::_jlong_arraycopy);
-
-#else // AARCH64
 
       BLOCK_COMMENT("scale indexes to element size");
       __ add(from, src, AsmOperand(src_pos, lsl, R12_elsize));       // src_addr
@@ -3552,7 +2712,6 @@ class StubGenerator: public StubCodeGenerator {
 
       __ b(StubRoutines::_jlong_arraycopy);
 
-#endif // AARCH64
     }
 
     // ObjArrayKlass
@@ -3582,9 +2741,7 @@ class StubGenerator: public StubCodeGenerator {
       __ BIND(L_plain_copy);
       __ mov(count, length);
 
-#ifndef AARCH64
       __ pop(saved_regs); // XXX optim: avoid later push in oop_arraycopy ?
-#endif // !AARCH64
       __ b(StubRoutines::_oop_arraycopy);
     }
 
@@ -3624,28 +2781,24 @@ class StubGenerator: public StubCodeGenerator {
       __ ldr_u32(sco_temp, Address(R6_dst_klass, sco_offset));
       generate_type_check(R5_src_klass, sco_temp, R6_dst_klass,
                           R8_temp, R9,
-                          AARCH64_ONLY(R10) NOT_AARCH64(R12),
+                          R12,
                           L_plain_copy);
 
       // Fetch destination element klass from the ObjArrayKlass header.
       int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
 
       // the checkcast_copy loop needs two extra arguments:
-      const Register Rdst_elem_klass = AARCH64_ONLY(R4) NOT_AARCH64(R3);
+      const Register Rdst_elem_klass = R3;
       __ ldr(Rdst_elem_klass, Address(R6_dst_klass, ek_offset));   // dest elem klass
-#ifndef AARCH64
       __ pop(saved_regs); // XXX optim: avoid later push in oop_arraycopy ?
       __ str(Rdst_elem_klass, Address(SP,0));    // dest elem klass argument
-#endif // !AARCH64
       __ ldr_u32(R3, Address(Rdst_elem_klass, sco_offset));  // sco of elem klass
       __ b(StubRoutines::_checkcast_arraycopy);
     }
 
     __ BIND(L_failed);
 
-#ifndef AARCH64
     __ pop(saved_regs);
-#endif // !AARCH64
     __ mvn(R0, 0); // failure, with 0 copied
     __ ret();
 
@@ -3679,11 +2832,7 @@ class StubGenerator: public StubCodeGenerator {
         break;
 
       case 8: // int64_t
-#ifdef AARCH64
-        __ ldr(R1, Address(R0));
-#else
         Unimplemented();
-#endif // AARCH64
         break;
 
       default:
@@ -3761,467 +2910,8 @@ class StubGenerator: public StubCodeGenerator {
 
   }
 
-#ifndef AARCH64
 #define COMPILE_CRYPTO
 #include "stubRoutinesCrypto_arm.cpp"
-#else
-
-#ifdef COMPILER2
-  // Arguments:
-  //
-  // Inputs:
-  //   c_rarg0   - source byte array address
-  //   c_rarg1   - destination byte array address
-  //   c_rarg2   - K (key) in little endian int array
-  //
-  address generate_aescrypt_encryptBlock() {
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
-
-    Label L_doLast;
-
-    const Register from        = c_rarg0;  // source array address
-    const Register to          = c_rarg1;  // destination array address
-    const Register key         = c_rarg2;  // key array address
-    const Register keylen      = R8;
-
-    address start = __ pc();
-    __ stp(FP, LR, Address(SP, -2 * wordSize, pre_indexed));
-    __ mov(FP, SP);
-
-    __ ldr_w(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-
-    __ vld1(V0, Address(from), MacroAssembler::VELEM_SIZE_8, 128); // get 16 bytes of input
-
-    __ vld1(V1, V2, V3, V4, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-
-    int quad = 1;
-    __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V3, V3, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V4, V4, MacroAssembler::VELEM_SIZE_8, quad);
-    __ aese(V0, V1);
-    __ aesmc(V0, V0);
-    __ aese(V0, V2);
-    __ aesmc(V0, V0);
-    __ aese(V0, V3);
-    __ aesmc(V0, V0);
-    __ aese(V0, V4);
-    __ aesmc(V0, V0);
-
-    __ vld1(V1, V2, V3, V4, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V3, V3, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V4, V4, MacroAssembler::VELEM_SIZE_8, quad);
-    __ aese(V0, V1);
-    __ aesmc(V0, V0);
-    __ aese(V0, V2);
-    __ aesmc(V0, V0);
-    __ aese(V0, V3);
-    __ aesmc(V0, V0);
-    __ aese(V0, V4);
-    __ aesmc(V0, V0);
-
-    __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ cmp_w(keylen, 44);
-    __ b(L_doLast, eq);
-
-    __ aese(V0, V1);
-    __ aesmc(V0, V0);
-    __ aese(V0, V2);
-    __ aesmc(V0, V0);
-
-    __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ cmp_w(keylen, 52);
-    __ b(L_doLast, eq);
-
-    __ aese(V0, V1);
-    __ aesmc(V0, V0);
-    __ aese(V0, V2);
-    __ aesmc(V0, V0);
-
-    __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ BIND(L_doLast);
-
-    __ aese(V0, V1);
-    __ aesmc(V0, V0);
-    __ aese(V0, V2);
-
-    __ vld1(V1, Address(key), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
-    __ eor(V0, V0, V1, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ vst1(V0, Address(to), MacroAssembler::VELEM_SIZE_8, 128);
-
-    __ mov(R0, 0);
-
-    __ mov(SP, FP);
-    __ ldp(FP, LR, Address(SP, 2 * wordSize, post_indexed));
-    __ ret(LR);
-
-    return start;
-  }
-
-  // Arguments:
-  //
-  // Inputs:
-  //   c_rarg0   - source byte array address
-  //   c_rarg1   - destination byte array address
-  //   c_rarg2   - K (key) in little endian int array
-  //
-  address generate_aescrypt_decryptBlock() {
-    assert(UseAES, "need AES instructions and misaligned SSE support");
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
-    Label L_doLast;
-
-    const Register from        = c_rarg0;  // source array address
-    const Register to          = c_rarg1;  // destination array address
-    const Register key         = c_rarg2;  // key array address
-    const Register keylen      = R8;
-
-    address start = __ pc();
-    __ stp(FP, LR, Address(SP, -2 * wordSize, pre_indexed));
-    __ mov(FP, SP);
-
-    __ ldr_w(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-
-    __ vld1(V0, Address(from), MacroAssembler::VELEM_SIZE_8, 128); // get 16 bytes of input
-
-    __ vld1(V5, Address(key, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-
-    int quad = 1;
-    __ rev32(V5, V5, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ vld1(V1, V2, V3, V4, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V3, V3, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V4, V4, MacroAssembler::VELEM_SIZE_8, quad);
-    __ aesd(V0, V1);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V2);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V3);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V4);
-    __ aesimc(V0, V0);
-
-    __ vld1(V1, V2, V3, V4, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V3, V3, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V4, V4, MacroAssembler::VELEM_SIZE_8, quad);
-    __ aesd(V0, V1);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V2);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V3);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V4);
-    __ aesimc(V0, V0);
-
-    __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ cmp_w(keylen, 44);
-    __ b(L_doLast, eq);
-
-    __ aesd(V0, V1);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V2);
-    __ aesimc(V0, V0);
-
-    __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ cmp_w(keylen, 52);
-    __ b(L_doLast, eq);
-
-    __ aesd(V0, V1);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V2);
-    __ aesimc(V0, V0);
-
-    __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ BIND(L_doLast);
-
-    __ aesd(V0, V1);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V2);
-
-    __ eor(V0, V0, V5, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ vst1(V0, Address(to), MacroAssembler::VELEM_SIZE_8, 128);
-
-    __ mov(R0, 0);
-
-    __ mov(SP, FP);
-    __ ldp(FP, LR, Address(SP, 2 * wordSize, post_indexed));
-    __ ret(LR);
-
-
-    return start;
-  }
-
-  // Arguments:
-  //
-  // Inputs:
-  //   c_rarg0   - source byte array address
-  //   c_rarg1   - destination byte array address
-  //   c_rarg2   - K (key) in little endian int array
-  //   c_rarg3   - r vector byte array address
-  //   c_rarg4   - input length
-  //
-  // Output:
-  //   x0        - input length
-  //
-  address generate_cipherBlockChaining_encryptAESCrypt() {
-    assert(UseAES, "need AES instructions and misaligned SSE support");
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
-
-    Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52;
-
-    const Register from        = c_rarg0;  // source array address
-    const Register to          = c_rarg1;  // destination array address
-    const Register key         = c_rarg2;  // key array address
-    const Register rvec        = c_rarg3;  // r byte array initialized from initvector array address
-                                           // and left with the results of the last encryption block
-    const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
-    const Register keylen      = R8;
-
-    address start = __ pc();
-    __ stp(FP, LR, Address(SP, -2 * wordSize, pre_indexed));
-    __ mov(FP, SP);
-
-    __ mov(R9, len_reg);
-    __ ldr_w(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-
-    __ vld1(V0, Address(rvec), MacroAssembler::VELEM_SIZE_8, 128);
-
-    __ cmp_w(keylen, 52);
-    __ b(L_loadkeys_44, cc);
-    __ b(L_loadkeys_52, eq);
-
-    __ vld1(V17, V18, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-
-    int quad = 1;
-    __ rev32(V17, V17, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V18, V18, MacroAssembler::VELEM_SIZE_8, quad);
-    __ BIND(L_loadkeys_52);
-    __ vld1(V19, V20, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V19, V19, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V20, V20, MacroAssembler::VELEM_SIZE_8, quad);
-    __ BIND(L_loadkeys_44);
-    __ vld1(V21, V22, V23, V24, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V21, V21, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V22, V22, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V23, V23, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V24, V24, MacroAssembler::VELEM_SIZE_8, quad);
-    __ vld1(V25, V26, V27, V28, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V25, V25, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V26, V26, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V27, V27, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V28, V28, MacroAssembler::VELEM_SIZE_8, quad);
-    __ vld1(V29, V30, V31, Address(key), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V29, V29, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V30, V30, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V31, V31, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ BIND(L_aes_loop);
-    __ vld1(V1, Address(from, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ eor(V0, V0, V1, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ b(L_rounds_44, cc);
-    __ b(L_rounds_52, eq);
-
-    __ aese(V0, V17);
-    __ aesmc(V0, V0);
-    __ aese(V0, V18);
-    __ aesmc(V0, V0);
-    __ BIND(L_rounds_52);
-    __ aese(V0, V19);
-    __ aesmc(V0, V0);
-    __ aese(V0, V20);
-    __ aesmc(V0, V0);
-    __ BIND(L_rounds_44);
-    __ aese(V0, V21);
-    __ aesmc(V0, V0);
-    __ aese(V0, V22);
-    __ aesmc(V0, V0);
-    __ aese(V0, V23);
-    __ aesmc(V0, V0);
-    __ aese(V0, V24);
-    __ aesmc(V0, V0);
-    __ aese(V0, V25);
-    __ aesmc(V0, V0);
-    __ aese(V0, V26);
-    __ aesmc(V0, V0);
-    __ aese(V0, V27);
-    __ aesmc(V0, V0);
-    __ aese(V0, V28);
-    __ aesmc(V0, V0);
-    __ aese(V0, V29);
-    __ aesmc(V0, V0);
-    __ aese(V0, V30);
-    __ eor(V0, V0, V31, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ vst1(V0, Address(to, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ sub(len_reg, len_reg, 16);
-    __ cbnz(len_reg, L_aes_loop);
-
-    __ vst1(V0, Address(rvec), MacroAssembler::VELEM_SIZE_8, 128);
-
-    __ mov(R0, R9);
-
-    __ mov(SP, FP);
-    __ ldp(FP, LR, Address(SP, 2 * wordSize, post_indexed));
-    __ ret(LR);
-
-    return start;
-  }
-
-  // Arguments:
-  //
-  // Inputs:
-  //   c_rarg0   - source byte array address
-  //   c_rarg1   - destination byte array address
-  //   c_rarg2   - K (key) in little endian int array
-  //   c_rarg3   - r vector byte array address
-  //   c_rarg4   - input length
-  //
-  // Output:
-  //   rax       - input length
-  //
-  address generate_cipherBlockChaining_decryptAESCrypt() {
-    assert(UseAES, "need AES instructions and misaligned SSE support");
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
-
-    Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52;
-
-    const Register from        = c_rarg0;  // source array address
-    const Register to          = c_rarg1;  // destination array address
-    const Register key         = c_rarg2;  // key array address
-    const Register rvec        = c_rarg3;  // r byte array initialized from initvector array address
-                                           // and left with the results of the last encryption block
-    const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
-    const Register keylen      = R8;
-
-    address start = __ pc();
-    __ stp(FP, LR, Address(SP, -2 * wordSize, pre_indexed));
-    __ mov(FP, SP);
-
-    __ mov(R9, len_reg);
-    __ ldr_w(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-
-    __ vld1(V2, Address(rvec), MacroAssembler::VELEM_SIZE_8, 128);
-
-    __ vld1(V31, Address(key, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-
-    int quad = 1;
-    __ rev32(V31, V31, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ cmp_w(keylen, 52);
-    __ b(L_loadkeys_44, cc);
-    __ b(L_loadkeys_52, eq);
-
-    __ vld1(V17, V18, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V17, V17, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V18, V18, MacroAssembler::VELEM_SIZE_8, quad);
-    __ BIND(L_loadkeys_52);
-    __ vld1(V19, V20, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V19, V19, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V20, V20, MacroAssembler::VELEM_SIZE_8, quad);
-    __ BIND(L_loadkeys_44);
-    __ vld1(V21, V22, V23, V24, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V21, V21, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V22, V22, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V23, V23, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V24, V24, MacroAssembler::VELEM_SIZE_8, quad);
-    __ vld1(V25, V26, V27, V28, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V25, V25, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V26, V26, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V27, V27, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V28, V28, MacroAssembler::VELEM_SIZE_8, quad);
-    __ vld1(V29, V30, Address(key), MacroAssembler::VELEM_SIZE_8, 128);
-    __ rev32(V29, V29, MacroAssembler::VELEM_SIZE_8, quad);
-    __ rev32(V30, V30, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ BIND(L_aes_loop);
-    __ vld1(V0, Address(from, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ orr(V1, V0, V0, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ b(L_rounds_44, cc);
-    __ b(L_rounds_52, eq);
-
-    __ aesd(V0, V17);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V17);
-    __ aesimc(V0, V0);
-    __ BIND(L_rounds_52);
-    __ aesd(V0, V19);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V20);
-    __ aesimc(V0, V0);
-    __ BIND(L_rounds_44);
-    __ aesd(V0, V21);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V22);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V23);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V24);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V25);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V26);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V27);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V28);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V29);
-    __ aesimc(V0, V0);
-    __ aesd(V0, V30);
-    __ eor(V0, V0, V31, MacroAssembler::VELEM_SIZE_8, quad);
-    __ eor(V0, V0, V2, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ vst1(V0, Address(to, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128);
-    __ orr(V2, V1, V1, MacroAssembler::VELEM_SIZE_8, quad);
-
-    __ sub(len_reg, len_reg, 16);
-    __ cbnz(len_reg, L_aes_loop);
-
-    __ vst1(V2, Address(rvec), MacroAssembler::VELEM_SIZE_8, 128);
-
-    __ mov(R0, R9);
-
-    __ mov(SP, FP);
-    __ ldp(FP, LR, Address(SP, 2 * wordSize, post_indexed));
-    __ ret(LR);
-
-    return start;
-  }
-
-#endif // COMPILER2
-#endif // AARCH64
 
  private:
 
@@ -4294,7 +2984,6 @@ class StubGenerator: public StubCodeGenerator {
     // stub for throwing stack overflow error used both by interpreter and compiler
     StubRoutines::_throw_StackOverflowError_entry  = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
 
-#ifndef AARCH64
     // integer division used both by interpreter and compiler
     StubRoutines::Arm::_idiv_irem_entry = generate_idiv_irem();
 
@@ -4304,7 +2993,6 @@ class StubGenerator: public StubCodeGenerator {
     StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
     StubRoutines::_atomic_load_long_entry = generate_atomic_load_long();
     StubRoutines::_atomic_store_long_entry = generate_atomic_store_long();
-#endif // !AARCH64
   }
 
   void generate_all() {
@@ -4334,24 +3022,10 @@ class StubGenerator: public StubCodeGenerator {
     generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
                                                    &StubRoutines::_safefetch32_fault_pc,
                                                    &StubRoutines::_safefetch32_continuation_pc);
-#ifdef AARCH64
-    generate_safefetch("SafeFetchN", wordSize, &StubRoutines::_safefetchN_entry,
-                                               &StubRoutines::_safefetchN_fault_pc,
-                                               &StubRoutines::_safefetchN_continuation_pc);
-#ifdef COMPILER2
-    if (UseAESIntrinsics) {
-      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
-      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
-      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
-      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
-    }
-#endif
-#else
     assert (sizeof(int) == wordSize, "32-bit architecture");
     StubRoutines::_safefetchN_entry           = StubRoutines::_safefetch32_entry;
     StubRoutines::_safefetchN_fault_pc        = StubRoutines::_safefetch32_fault_pc;
     StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
-#endif // AARCH64
 
 #ifdef COMPILE_CRYPTO
     // generate AES intrinsics code
diff --git a/src/hotspot/cpu/arm/stubRoutines_arm.cpp b/src/hotspot/cpu/arm/stubRoutines_arm.cpp
index a7e56c105f6..6ae6d1fe8d1 100644
--- a/src/hotspot/cpu/arm/stubRoutines_arm.cpp
+++ b/src/hotspot/cpu/arm/stubRoutines_arm.cpp
@@ -27,13 +27,9 @@
 #include "runtime/frame.inline.hpp"
 #include "runtime/stubRoutines.hpp"
 
-#ifndef AARCH64
 address StubRoutines::Arm::_idiv_irem_entry = NULL;
-#endif
 
 address StubRoutines::Arm::_partial_subtype_check = NULL;
 
-#ifndef AARCH64
 address StubRoutines::_atomic_load_long_entry = NULL;
 address StubRoutines::_atomic_store_long_entry = NULL;
-#endif
diff --git a/src/hotspot/cpu/arm/stubRoutines_arm.hpp b/src/hotspot/cpu/arm/stubRoutines_arm.hpp
index a94897a4dd8..2c86038f456 100644
--- a/src/hotspot/cpu/arm/stubRoutines_arm.hpp
+++ b/src/hotspot/cpu/arm/stubRoutines_arm.hpp
@@ -40,16 +40,12 @@ class Arm {
 
  private:
 
-#ifndef AARCH64
   static address _idiv_irem_entry;
-#endif
   static address _partial_subtype_check;
 
  public:
 
-#ifndef AARCH64
   static address idiv_irem_entry() { return _idiv_irem_entry; }
-#endif
   static address partial_subtype_check() { return _partial_subtype_check; }
 };
 
@@ -57,13 +53,11 @@ class Arm {
     return return_pc == _call_stub_return_address;
   }
 
-#ifndef AARCH64
   static address _atomic_load_long_entry;
   static address _atomic_store_long_entry;
 
   static address atomic_load_long_entry()                  { return _atomic_load_long_entry; }
   static address atomic_store_long_entry()                 { return _atomic_store_long_entry; }
-#endif
 
 
 #endif // CPU_ARM_VM_STUBROUTINES_ARM_HPP
diff --git a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
index c96f4eafc57..cca60c42a97 100644
--- a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
+++ b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
@@ -65,7 +65,7 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() {
   address entry = __ pc();
 
   // callee-save register for saving LR, shared with generate_native_entry
-  const Register Rsaved_ret_addr = AARCH64_ONLY(R21) NOT_AARCH64(Rtmp_save0);
+  const Register Rsaved_ret_addr = Rtmp_save0;
 
   __ mov(Rsaved_ret_addr, LR);
 
@@ -73,24 +73,6 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() {
   __ mov(R2, Rlocals);
   __ mov(R3, SP);
 
-#ifdef AARCH64
-  // expand expr. stack and extended SP to avoid cutting SP in call_VM
-  __ mov(Rstack_top, SP);
-  __ str(Rstack_top, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize));
-  __ check_stack_top();
-
-  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), R1, R2, R3, false);
-
-  __ ldp(ZR,      c_rarg1, Address(SP, 2*wordSize, post_indexed));
-  __ ldp(c_rarg2, c_rarg3, Address(SP, 2*wordSize, post_indexed));
-  __ ldp(c_rarg4, c_rarg5, Address(SP, 2*wordSize, post_indexed));
-  __ ldp(c_rarg6, c_rarg7, Address(SP, 2*wordSize, post_indexed));
-
-  __ ldp_d(V0, V1, Address(SP, 2*wordSize, post_indexed));
-  __ ldp_d(V2, V3, Address(SP, 2*wordSize, post_indexed));
-  __ ldp_d(V4, V5, Address(SP, 2*wordSize, post_indexed));
-  __ ldp_d(V6, V7, Address(SP, 2*wordSize, post_indexed));
-#else
 
   // Safer to save R9 (when scratched) since callers may have been
   // written assuming R9 survives. This is suboptimal but
@@ -110,7 +92,6 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() {
   // eliminate any gain imposed by avoiding 8 double word loads.
   __ fldmiad(SP, FloatRegisterSet(D0, 8), writeback);
 #endif // __ABI_HARD__
-#endif // AARCH64
 
   __ ret(Rsaved_ret_addr);
 
@@ -129,10 +110,6 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() {
 address TemplateInterpreterGenerator::generate_abstract_entry(void) {
   address entry_point = __ pc();
 
-#ifdef AARCH64
-  __ restore_sp_after_call(Rtemp);
-  __ restore_stack_top();
-#endif
 
   __ empty_expression_stack();
 
@@ -274,16 +251,11 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state,
 
   __ interp_verify_oop(R0_tos, state, __FILE__, __LINE__);
 
-#ifdef AARCH64
-  __ restore_sp_after_call(Rtemp);  // Restore SP to extended SP
-  __ restore_stack_top();
-#else
   // Restore stack bottom in case i2c adjusted stack
   __ ldr(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
   // and NULL it as marker that SP is now tos until next java call
   __ mov(Rtemp, (int)NULL_WORD);
   __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
-#endif // AARCH64
 
   __ restore_method();
   __ restore_bcp();
@@ -299,9 +271,7 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state,
   __ check_stack_top();
   __ add(Rstack_top, Rstack_top, AsmOperand(Rtemp, lsl, Interpreter::logStackElementSize));
 
-#ifndef AARCH64
   __ convert_retval_to_tos(state);
-#endif // !AARCH64
 
  __ check_and_handle_popframe();
  __ check_and_handle_earlyret();
@@ -317,15 +287,10 @@ address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, i
 
   __ interp_verify_oop(R0_tos, state, __FILE__, __LINE__);
 
-#ifdef AARCH64
-  __ restore_sp_after_call(Rtemp);  // Restore SP to extended SP
-  __ restore_stack_top();
-#else
   // The stack is not extended by deopt but we must NULL last_sp as this
   // entry is like a "return".
   __ mov(Rtemp, 0);
   __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
-#endif // AARCH64
 
   __ restore_method();
   __ restore_bcp();
@@ -354,9 +319,9 @@ address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type
   address entry = __ pc();
   switch (type) {
     case T_BOOLEAN: __ c2bool(R0); break;
-    case T_CHAR   : AARCH64_ONLY(__ zero_extend(R0, R0, 16);)  break;
-    case T_BYTE   : AARCH64_ONLY(__ sign_extend(R0, R0,  8);)  break;
-    case T_SHORT  : AARCH64_ONLY(__ sign_extend(R0, R0, 16);)  break;
+    case T_CHAR   : break;
+    case T_BYTE   : break;
+    case T_SHORT  : break;
     case T_INT    : // fall through
     case T_LONG   : // fall through
     case T_VOID   : // fall through
@@ -453,11 +418,7 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow,
 
     __ add(Ricnt, Ricnt, InvocationCounter::count_increment);
 
-#ifdef AARCH64
-    __ andr(Rbcnt, Rbcnt, (unsigned int)InvocationCounter::count_mask_value); // mask out the status bits
-#else
     __ bic(Rbcnt, Rbcnt, ~InvocationCounter::count_mask_value); // mask out the status bits
-#endif // AARCH64
 
     __ str_32(Ricnt, invocation_counter);            // save invocation count
     __ add(Ricnt, Ricnt, Rbcnt);                     // add both counters
@@ -509,13 +470,12 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
   // Registers on entry:
   //
   // R3 = number of additional locals
-  // R11 = max expression stack slots (AArch64 only)
   // Rthread
   // Rmethod
   // Registers used: R0, R1, R2, Rtemp.
 
   const Register Radditional_locals = R3;
-  const Register RmaxStack = AARCH64_ONLY(R11) NOT_AARCH64(R2);
+  const Register RmaxStack = R2;
 
   // monitor entry size
   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
@@ -532,10 +492,8 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
 
   __ ldr(R0, Address(Rthread, Thread::stack_base_offset()));
   __ ldr(R1, Address(Rthread, Thread::stack_size_offset()));
-#ifndef AARCH64
   __ ldr(Rtemp, Address(Rmethod, Method::const_offset()));
   __ ldrh(RmaxStack, Address(Rtemp, ConstMethod::max_stack_offset()));
-#endif // !AARCH64
   __ sub_slow(Rtemp, SP, overhead_size + reserved_pages + guard_pages + Method::extra_stack_words());
 
   // reserve space for additional locals
@@ -549,16 +507,8 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
 
   __ cmp(Rtemp, R0);
 
-#ifdef AARCH64
-  Label L;
-  __ b(L, hi);
-  __ mov(SP, Rsender_sp);  // restore SP
-  __ b(StubRoutines::throw_StackOverflowError_entry());
-  __ bind(L);
-#else
   __ mov(SP, Rsender_sp, ls);  // restore SP
   __ b(StubRoutines::throw_StackOverflowError_entry(), ls);
-#endif // AARCH64
 }
 
 
@@ -582,26 +532,15 @@ void TemplateInterpreterGenerator::lock_method() {
   // get synchronization object
   { Label done;
     __ ldr_u32(Rtemp, Address(Rmethod, Method::access_flags_offset()));
-#ifdef AARCH64
-    __ ldr(R0, Address(Rlocals, Interpreter::local_offset_in_bytes(0))); // get receiver (assume this is frequent case)
-    __ tbz(Rtemp, JVM_ACC_STATIC_BIT, done);
-#else
     __ tst(Rtemp, JVM_ACC_STATIC);
     __ ldr(R0, Address(Rlocals, Interpreter::local_offset_in_bytes(0)), eq); // get receiver (assume this is frequent case)
     __ b(done, eq);
-#endif // AARCH64
     __ load_mirror(R0, Rmethod, Rtemp);
     __ bind(done);
   }
 
   // add space for monitor & lock
 
-#ifdef AARCH64
-  __ check_extended_sp(Rtemp);
-  __ sub(SP, SP, entry_size);                  // adjust extended SP
-  __ mov(Rtemp, SP);
-  __ str(Rtemp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize));
-#endif // AARCH64
 
   __ sub(Rstack_top, Rstack_top, entry_size);
   __ check_stack_top_on_expansion();
@@ -614,90 +553,6 @@ void TemplateInterpreterGenerator::lock_method() {
   __ lock_object(R1);
 }
 
-#ifdef AARCH64
-
-//
-// Generate a fixed interpreter frame. This is identical setup for interpreted methods
-// and for native methods hence the shared code.
-//
-// On entry:
-//   R10 = ConstMethod
-//   R11 = max expr. stack (in slots), if !native_call
-//
-// On exit:
-//   Rbcp, Rstack_top are initialized, SP is extended
-//
-void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
-  // Incoming registers
-  const Register RconstMethod = R10;
-  const Register RmaxStack = R11;
-  // Temporary registers
-  const Register RextendedSP = R0;
-  const Register Rcache = R1;
-  const Register Rmdp = ProfileInterpreter ? R2 : ZR;
-
-  // Generates the following stack layout (stack grows up in this picture):
-  //
-  // [ expr. stack bottom ]
-  // [ saved Rbcp         ]
-  // [ current Rlocals    ]
-  // [ cache              ]
-  // [ mdx                ]
-  // [ mirror             ]
-  // [ Method*            ]
-  // [ extended SP        ]
-  // [ expr. stack top    ]
-  // [ sender_sp          ]
-  // [ saved FP           ] <--- FP
-  // [ saved LR           ]
-
-  // initialize fixed part of activation frame
-  __ stp(FP, LR, Address(SP, -2*wordSize, pre_indexed));
-  __ mov(FP, SP);                                     // establish new FP
-
-  // setup Rbcp
-  if (native_call) {
-    __ mov(Rbcp, ZR);                                 // bcp = 0 for native calls
-  } else {
-    __ add(Rbcp, RconstMethod, in_bytes(ConstMethod::codes_offset())); // get codebase
-  }
-
-  // Rstack_top & RextendedSP
-  __ sub(Rstack_top, SP, 10*wordSize);
-  if (native_call) {
-    __ sub(RextendedSP, Rstack_top, align_up(wordSize, StackAlignmentInBytes));    // reserve 1 slot for exception handling
-  } else {
-    __ sub(RextendedSP, Rstack_top, AsmOperand(RmaxStack, lsl, Interpreter::logStackElementSize));
-    __ align_reg(RextendedSP, RextendedSP, StackAlignmentInBytes);
-  }
-  __ mov(SP, RextendedSP);
-  __ check_stack_top();
-
-  // Load Rmdp
-  if (ProfileInterpreter) {
-    __ ldr(Rtemp, Address(Rmethod, Method::method_data_offset()));
-    __ tst(Rtemp, Rtemp);
-    __ add(Rtemp, Rtemp, in_bytes(MethodData::data_offset()));
-    __ csel(Rmdp, ZR, Rtemp, eq);
-  }
-
-  // Load Rcache
-  __ ldr(Rtemp, Address(RconstMethod, ConstMethod::constants_offset()));
-  __ ldr(Rcache, Address(Rtemp, ConstantPool::cache_offset_in_bytes()));
-  // Get mirror and store it in the frame as GC root for this Method*
-  __ load_mirror(Rtemp, Rmethod, Rtemp);
-
-  // Build fixed frame
-  __ stp(Rstack_top, Rbcp, Address(FP, -10*wordSize));
-  __ stp(Rlocals, Rcache,  Address(FP,  -8*wordSize));
-  __ stp(Rmdp, Rtemp,          Address(FP,  -6*wordSize));
-  __ stp(Rmethod, RextendedSP, Address(FP,  -4*wordSize));
-  __ stp(ZR, Rsender_sp,   Address(FP,  -2*wordSize));
-  assert(frame::interpreter_frame_initial_sp_offset == -10, "interpreter frame broken");
-  assert(frame::interpreter_frame_stack_top_offset  == -2, "stack top broken");
-}
-
-#else // AARCH64
 
 //
 // Generate a fixed interpreter frame. This is identical setup for interpreted methods
@@ -759,7 +614,6 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
   __ str(SP, Address(SP, 0));                          // set expression stack bottom
 }
 
-#endif // AARCH64
 
 // End of helpers
 
@@ -788,7 +642,6 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
 //
 // Stack layout immediately at entry
 //
-// [ optional padding(*)] <--- SP (AArch64)
 // [ parameter n        ] <--- Rparams (SP on 32-bit ARM)
 //   ...
 // [ parameter 1        ]
@@ -802,7 +655,6 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
 // local variables follow incoming parameters immediately; i.e.
 // the return address is saved at the end of the locals.
 //
-// [ reserved stack (*) ] <--- SP (AArch64)
 // [ expr. stack        ] <--- Rstack_top (SP on 32-bit ARM)
 // [ monitor entry      ]
 //   ...
@@ -818,10 +670,6 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
 // 32-bit ARM:
 // [ last_sp            ]
 //
-// AArch64:
-// [ extended SP (*)    ]
-// [ stack top (*)      ]
-//
 // [ sender_sp          ]
 // [ saved FP           ] <--- FP
 // [ saved LR           ]
@@ -833,8 +681,6 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
 //   ...
 // [ parameter 1        ] <--- Rlocals
 //
-// (*) - AArch64 only
-//
 
 address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
   // Code: _aload_0, _getfield, _areturn
@@ -911,29 +757,18 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   address entry_point = __ pc();
 
   // Register allocation
-  const Register Rsize_of_params = AARCH64_ONLY(R20) NOT_AARCH64(R6);
-  const Register Rsig_handler    = AARCH64_ONLY(R21) NOT_AARCH64(Rtmp_save0 /* R4 */);
-  const Register Rnative_code    = AARCH64_ONLY(R22) NOT_AARCH64(Rtmp_save1 /* R5 */);
-  const Register Rresult_handler = AARCH64_ONLY(Rsig_handler) NOT_AARCH64(R6);
-
-#ifdef AARCH64
-  const Register RconstMethod = R10; // also used in generate_fixed_frame (should match)
-  const Register Rsaved_result = Rnative_code;
-  const FloatRegister Dsaved_result = V8;
-#else
+  const Register Rsize_of_params = R6;
+  const Register Rsig_handler    = Rtmp_save0;   // R4
+  const Register Rnative_code    = Rtmp_save1;   // R5
+  const Register Rresult_handler = R6;
+
   const Register Rsaved_result_lo = Rtmp_save0;  // R4
   const Register Rsaved_result_hi = Rtmp_save1;  // R5
   FloatRegister saved_result_fp;
-#endif // AARCH64
 
 
-#ifdef AARCH64
-  __ ldr(RconstMethod, Address(Rmethod, Method::const_offset()));
-  __ ldrh(Rsize_of_params,  Address(RconstMethod, ConstMethod::size_of_parameters_offset()));
-#else
   __ ldr(Rsize_of_params, Address(Rmethod, Method::const_offset()));
   __ ldrh(Rsize_of_params,  Address(Rsize_of_params, ConstMethod::size_of_parameters_offset()));
-#endif // AARCH64
 
   // native calls don't need the stack size check since they have no expression stack
   // and the arguments are already on the stack and we only add a handful of words
@@ -943,19 +778,9 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   __ sub(Rlocals, Rparams, wordSize);
   __ add(Rlocals, Rlocals, AsmOperand(Rsize_of_params, lsl, Interpreter::logStackElementSize));
 
-#ifdef AARCH64
-  int extra_stack_reserve = 2*wordSize; // extra space for oop_temp
-  if(__ can_post_interpreter_events()) {
-    // extra space for saved results
-    extra_stack_reserve += 2*wordSize;
-  }
-  // reserve extra stack space and nullify oop_temp slot
-  __ stp(ZR, ZR, Address(SP, -extra_stack_reserve, pre_indexed));
-#else
   // reserve stack space for oop_temp
   __ mov(R0, 0);
   __ push(R0);
-#endif // AARCH64
 
   generate_fixed_frame(true); // Note: R9 is now saved in the frame
 
@@ -1051,15 +876,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
 
   // Allocate stack space for arguments
 
-#ifdef AARCH64
-  __ sub(Rtemp, SP, Rsize_of_params, ex_uxtw, LogBytesPerWord);
-  __ align_reg(SP, Rtemp, StackAlignmentInBytes);
-
-  // Allocate more stack space to accomodate all arguments passed on GP and FP registers:
-  // 8 * wordSize for GPRs
-  // 8 * wordSize for FPRs
-  int reg_arguments = align_up(8*wordSize + 8*wordSize, StackAlignmentInBytes);
-#else
 
   // C functions need aligned stack
   __ bic(SP, SP, StackAlignmentInBytes - 1);
@@ -1079,12 +895,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   // It is also used for JNIEnv & class additional parameters.
   int reg_arguments = 4 * wordSize;
 #endif // __ABI_HARD__
-#endif // AARCH64
 
   __ sub(SP, SP, reg_arguments);
 
 
-  // Note: signature handler blows R4 (32-bit ARM) or R21 (AArch64) besides all scratch registers.
+  // Note: signature handler blows R4 besides all scratch registers.
   // See AbstractInterpreterGenerator::generate_slow_signature_handler().
   __ call(Rsig_handler);
 #if R9_IS_SCRATCHED
@@ -1120,18 +935,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   }
 #endif
 
-#ifdef AARCH64
-  __ mov(Rtemp, _thread_in_native);
-  __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset()));
-  // STLR is used to force all preceding writes to be observed prior to thread state change
-  __ stlr_w(Rtemp, Rtemp2);
-#else
   // Force all preceding writes to be observed prior to thread state change
   __ membar(MacroAssembler::StoreStore, Rtemp);
 
   __ mov(Rtemp, _thread_in_native);
   __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
-#endif // AARCH64
 
   __ call(Rnative_code);
 #if R9_IS_SCRATCHED
@@ -1153,10 +961,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   __ ldr_global_s32(Rtemp, SafepointSynchronize::address_of_state());
 
   // Protect the return value in the interleaved code: save it to callee-save registers.
-#ifdef AARCH64
-  __ mov(Rsaved_result, R0);
-  __ fmov_d(Dsaved_result, D0);
-#else
   __ mov(Rsaved_result_lo, R0);
   __ mov(Rsaved_result_hi, R1);
 #ifdef __ABI_HARD__
@@ -1166,26 +970,17 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
 #else
   saved_result_fp = fnoreg;
 #endif // __ABI_HARD__
-#endif // AARCH64
 
   {
     __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset()));
     __ cmp(Rtemp, SafepointSynchronize::_not_synchronized);
     __ cond_cmp(R3, 0, eq);
 
-#ifdef AARCH64
-    Label L;
-    __ b(L, eq);
-    __ mov(R0, Rthread);
-    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::none);
-    __ bind(L);
-#else
   __ mov(R0, Rthread, ne);
   __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::none, ne);
 #if R9_IS_SCRATCHED
   __ restore_method();
 #endif
-#endif // AARCH64
   }
 
   // Perform Native->Java thread transition
@@ -1206,7 +1001,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
     __ mov_slow(Rtemp, AbstractInterpreter::result_handler(T_OBJECT));
     __ cmp(Rresult_handler, Rtemp);
     __ b(Lnot_oop, ne);
-    Register value = AARCH64_ONLY(Rsaved_result) NOT_AARCH64(Rsaved_result_lo);
+    Register value = Rsaved_result_lo;
     __ resolve_jobject(value,   // value
                        Rtemp,   // tmp1
                        R1_tmp); // tmp2
@@ -1215,43 +1010,23 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
     __ bind(Lnot_oop);
   }
 
-#ifdef AARCH64
-  // Restore SP (drop native parameters area), to keep SP in sync with extended_sp in frame
-  __ restore_sp_after_call(Rtemp);
-  __ check_stack_top();
-#endif // AARCH64
 
   // reguard stack if StackOverflow exception happened while in native.
   {
     __ ldr_u32(Rtemp, Address(Rthread, JavaThread::stack_guard_state_offset()));
     __ cmp_32(Rtemp, JavaThread::stack_guard_yellow_reserved_disabled);
-#ifdef AARCH64
-    Label L;
-    __ b(L, ne);
-    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::none);
-    __ bind(L);
-#else
   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::none, eq);
 #if R9_IS_SCRATCHED
   __ restore_method();
 #endif
-#endif // AARCH64
   }
 
   // check pending exceptions
   {
     __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
-#ifdef AARCH64
-    Label L;
-    __ cbz(Rtemp, L);
-    __ mov_pc_to(Rexception_pc);
-    __ b(StubRoutines::forward_exception_entry());
-    __ bind(L);
-#else
     __ cmp(Rtemp, 0);
     __ mov(Rexception_pc, PC, ne);
     __ b(StubRoutines::forward_exception_entry(), ne);
-#endif // AARCH64
   }
 
   if (synchronized) {
@@ -1265,19 +1040,9 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   //       the exception handler code notifies the runtime of method exits
   //       too. If this happens before, method entry/exit notifications are
   //       not properly paired (was bug - gri 11/22/99).
-#ifdef AARCH64
-  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI, true, Rsaved_result, noreg, Dsaved_result);
-#else
   __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI, true, Rsaved_result_lo, Rsaved_result_hi, saved_result_fp);
-#endif // AARCH64
 
   // Restore the result. Oop result is restored from the stack.
-#ifdef AARCH64
-  __ mov(R0, Rsaved_result);
-  __ fmov_d(D0, Dsaved_result);
-
-  __ blr(Rresult_handler);
-#else
   __ mov(R0, Rsaved_result_lo);
   __ mov(R1, Rsaved_result_hi);
 
@@ -1286,18 +1051,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   __ fcpyd(D0, D8);
 #endif // __ABI_HARD__
   __ blx(Rresult_handler);
-#endif // AARCH64
 
   // Restore FP/LR, sender_sp and return
-#ifdef AARCH64
-  __ ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
-  __ ldp(FP, LR, Address(FP));
-  __ mov(SP, Rtemp);
-#else
   __ mov(Rtemp, FP);
   __ ldmia(FP, RegisterSet(FP) | RegisterSet(LR));
   __ ldr(SP, Address(Rtemp, frame::interpreter_frame_sender_sp_offset * wordSize));
-#endif // AARCH64
 
   __ ret();
 
@@ -1324,12 +1082,8 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
 
   address entry_point = __ pc();
 
-  const Register RconstMethod = AARCH64_ONLY(R10) NOT_AARCH64(R3);
+  const Register RconstMethod = R3;
 
-#ifdef AARCH64
-  const Register RmaxStack = R11;
-  const Register RlocalsBase = R12;
-#endif // AARCH64
 
   __ ldr(RconstMethod, Address(Rmethod, Method::const_offset()));
 
@@ -1342,48 +1096,10 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
 
   __ sub(R3, R3, R2); // number of additional locals
 
-#ifdef AARCH64
-  // setup RmaxStack
-  __ ldrh(RmaxStack, Address(RconstMethod, ConstMethod::max_stack_offset()));
-  // We have to add extra reserved slots to max_stack. There are 3 users of the extra slots,
-  // none of which are at the same time, so we just need to make sure there is enough room
-  // for the biggest user:
-  //   -reserved slot for exception handler
-  //   -reserved slots for JSR292. Method::extra_stack_entries() is the size.
-  //   -3 reserved slots so get_method_counters() can save some registers before call_VM().
-  __ add(RmaxStack, RmaxStack, MAX2(3, Method::extra_stack_entries()));
-#endif // AARCH64
 
   // see if we've got enough room on the stack for locals plus overhead.
   generate_stack_overflow_check();
 
-#ifdef AARCH64
-
-  // allocate space for locals
-  {
-    __ sub(RlocalsBase, Rparams, AsmOperand(R3, lsl, Interpreter::logStackElementSize));
-    __ align_reg(SP, RlocalsBase, StackAlignmentInBytes);
-  }
-
-  // explicitly initialize locals
-  {
-    Label zero_loop, done;
-    __ cbz(R3, done);
-
-    __ tbz(R3, 0, zero_loop);
-    __ subs(R3, R3, 1);
-    __ str(ZR, Address(RlocalsBase, wordSize, post_indexed));
-    __ b(done, eq);
-
-    __ bind(zero_loop);
-    __ subs(R3, R3, 2);
-    __ stp(ZR, ZR, Address(RlocalsBase, 2*wordSize, post_indexed));
-    __ b(zero_loop, ne);
-
-    __ bind(done);
-  }
-
-#else
   // allocate space for locals
   // explicitly initialize locals
 
@@ -1409,7 +1125,6 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
   __ push(R0, ge);
 
   __ b(loop, gt);
-#endif // AARCH64
 
   // initialize fixed part of activation frame
   generate_fixed_frame(false);
@@ -1524,11 +1239,9 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
   Interpreter::_rethrow_exception_entry = __ pc();
   // Rexception_obj: exception
 
-#ifndef AARCH64
   // Clear interpreter_frame_last_sp.
   __ mov(Rtemp, 0);
   __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
-#endif // !AARCH64
 
 #if R9_IS_SCRATCHED
   __ restore_method();
@@ -1537,9 +1250,6 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
   __ restore_dispatch();
   __ restore_locals();
 
-#ifdef AARCH64
-  __ restore_sp_after_call(Rtemp);
-#endif // AARCH64
 
   // Entry point for exceptions thrown within interpreter code
   Interpreter::_throw_exception_entry = __ pc();
@@ -1576,9 +1286,6 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
   //
    Interpreter::_remove_activation_preserving_args_entry = __ pc();
 
-#ifdef AARCH64
-  __ restore_sp_after_call(Rtemp); // restore SP to extended SP
-#endif // AARCH64
 
   __ empty_expression_stack();
 
@@ -1605,9 +1312,6 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
     __ ldr(R0, Address(FP, frame::return_addr_offset * wordSize));
     __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), R0);
     __ cbnz_32(R0, caller_not_deoptimized);
-#ifdef AARCH64
-    __ NOT_TESTED();
-#endif
 
     // Compute size of arguments for saving when returning to deoptimized caller
     __ restore_method();
@@ -1642,7 +1346,6 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
                        /* install_monitor_exception */ false,
                        /* notify_jvmdi */ false);
 
-#ifndef AARCH64
   // Finish with popframe handling
   // A previous I2C followed by a deoptimization might have moved the
   // outgoing arguments further up the stack. PopFrame expects the
@@ -1661,17 +1364,11 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
   __ mov(R0, Rthread);
   __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), R0, R1, R2);
   __ reset_last_Java_frame(Rtemp);
-#endif // !AARCH64
 
-#ifdef AARCH64
-  __ restore_sp_after_call(Rtemp);
-  __ restore_stack_top();
-#else
   // Restore the last_sp and null it out
   __ ldr(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
   __ mov(Rtemp, (int)NULL_WORD);
   __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
-#endif // AARCH64
 
   __ restore_bcp();
   __ restore_dispatch();
@@ -1748,9 +1445,6 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
 address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
   address entry = __ pc();
 
-#ifdef AARCH64
-  __ restore_sp_after_call(Rtemp); // restore SP to extended SP
-#endif // AARCH64
 
   __ restore_bcp();
   __ restore_dispatch();
@@ -1771,13 +1465,11 @@ address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state
                        false, /* install_monitor_exception */
                        true); /* notify_jvmdi */
 
-#ifndef AARCH64
   // According to interpreter calling conventions, result is returned in R0/R1,
   // so ftos (S0) and dtos (D0) are moved to R0/R1.
   // This conversion should be done after remove_activation, as it uses
   // push(state) & pop(state) to preserve return value.
   __ convert_tos_to_retval(state);
-#endif // !AARCH64
   __ ret();
 
   return entry;
@@ -1800,7 +1492,7 @@ void TemplateInterpreterGenerator::set_vtos_entry_points (Template* t, address&
 
   lep = __ pc(); __ push(ltos); __ b(L);
 
-  if (AARCH64_ONLY(true) NOT_AARCH64(VerifyOops)) {  // can't share atos entry with itos on AArch64 or if VerifyOops
+  if (VerifyOops) {  // can't share atos entry if VerifyOops
     aep = __ pc(); __ push(atos); __ b(L);
   } else {
     aep = __ pc();              // fall through
@@ -1828,11 +1520,7 @@ address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
 
   // pass tosca registers as arguments
   __ mov(R2, R0_tos);
-#ifdef AARCH64
-  __ mov(R3, ZR);
-#else
   __ mov(R3, R1_tos_hi);
-#endif // AARCH64
   __ mov(R1, LR);       // save return address
 
   // call tracer
diff --git a/src/hotspot/cpu/arm/templateTable_arm.cpp b/src/hotspot/cpu/arm/templateTable_arm.cpp
index 3adf9fbd88c..124c17353f9 100644
--- a/src/hotspot/cpu/arm/templateTable_arm.cpp
+++ b/src/hotspot/cpu/arm/templateTable_arm.cpp
@@ -58,9 +58,7 @@ static inline Address iaddress(int n)            {
 }
 
 static inline Address laddress(int n)            { return iaddress(n + 1); }
-#ifndef AARCH64
 static inline Address haddress(int n)            { return iaddress(n + 0); }
-#endif // !AARCH64
 
 static inline Address faddress(int n)            { return iaddress(n); }
 static inline Address daddress(int n)            { return laddress(n); }
@@ -72,12 +70,7 @@ void TemplateTable::get_local_base_addr(Register r, Register index) {
 }
 
 Address TemplateTable::load_iaddress(Register index, Register scratch) {
-#ifdef AARCH64
-  get_local_base_addr(scratch, index);
-  return Address(scratch);
-#else
   return Address(Rlocals, index, lsl, Interpreter::logStackElementSize, basic_offset, sub_offset);
-#endif // AARCH64
 }
 
 Address TemplateTable::load_aaddress(Register index, Register scratch) {
@@ -113,45 +106,29 @@ static inline Address at_tos_p2() {
 }
 
 
-// 32-bit ARM:
 // Loads double/long local into R0_tos_lo/R1_tos_hi with two
 // separate ldr instructions (supports nonadjacent values).
 // Used for longs in all modes, and for doubles in SOFTFP mode.
-//
-// AArch64: loads long local into R0_tos.
-//
 void TemplateTable::load_category2_local(Register Rlocal_index, Register tmp) {
   const Register Rlocal_base = tmp;
   assert_different_registers(Rlocal_index, tmp);
 
   get_local_base_addr(Rlocal_base, Rlocal_index);
-#ifdef AARCH64
-  __ ldr(R0_tos, Address(Rlocal_base, Interpreter::local_offset_in_bytes(1)));
-#else
   __ ldr(R0_tos_lo, Address(Rlocal_base, Interpreter::local_offset_in_bytes(1)));
   __ ldr(R1_tos_hi, Address(Rlocal_base, Interpreter::local_offset_in_bytes(0)));
-#endif // AARCH64
 }
 
 
-// 32-bit ARM:
 // Stores R0_tos_lo/R1_tos_hi to double/long local with two
 // separate str instructions (supports nonadjacent values).
 // Used for longs in all modes, and for doubles in SOFTFP mode
-//
-// AArch64: stores R0_tos to long local.
-//
 void TemplateTable::store_category2_local(Register Rlocal_index, Register tmp) {
   const Register Rlocal_base = tmp;
   assert_different_registers(Rlocal_index, tmp);
 
   get_local_base_addr(Rlocal_base, Rlocal_index);
-#ifdef AARCH64
-  __ str(R0_tos, Address(Rlocal_base, Interpreter::local_offset_in_bytes(1)));
-#else
   __ str(R0_tos_lo, Address(Rlocal_base, Interpreter::local_offset_in_bytes(1)));
   __ str(R1_tos_hi, Address(Rlocal_base, Interpreter::local_offset_in_bytes(0)));
-#endif // AARCH64
 }
 
 // Returns address of Java array element using temp register as address base.
@@ -180,7 +157,7 @@ AsmCondition convNegCond(TemplateTable::Condition cc) {
 // Miscelaneous helper routines
 
 // Store an oop (or NULL) at the address described by obj.
-// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
+// Blows all volatile registers R0-R3, Rtemp, LR).
 // Also destroys new_val and obj.base().
 static void do_oop_store(InterpreterMacroAssembler* _masm,
                          Address obj,
@@ -212,7 +189,7 @@ Address TemplateTable::at_bcp(int offset) {
 }
 
 
-// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR.
+// Blows volatile registers R0-R3, Rtemp, LR.
 void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
                                    Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
                                    int byte_no) {
@@ -315,22 +292,12 @@ void TemplateTable::lconst(int value) {
   transition(vtos, ltos);
   assert((value == 0) || (value == 1), "unexpected long constant");
   __ mov(R0_tos, value);
-#ifndef AARCH64
   __ mov(R1_tos_hi, 0);
-#endif // !AARCH64
 }
 
 
 void TemplateTable::fconst(int value) {
   transition(vtos, ftos);
-#ifdef AARCH64
-  switch(value) {
-  case 0:   __ fmov_sw(S0_tos, ZR);    break;
-  case 1:   __ fmov_s (S0_tos, 0x70);  break;
-  case 2:   __ fmov_s (S0_tos, 0x00);  break;
-  default:  ShouldNotReachHere();      break;
-  }
-#else
   const int zero = 0;         // 0.0f
   const int one = 0x3f800000; // 1.0f
   const int two = 0x40000000; // 2.0f
@@ -345,19 +312,11 @@ void TemplateTable::fconst(int value) {
 #ifndef __SOFTFP__
   __ fmsr(S0_tos, R0_tos);
 #endif // !__SOFTFP__
-#endif // AARCH64
 }
 
 
 void TemplateTable::dconst(int value) {
   transition(vtos, dtos);
-#ifdef AARCH64
-  switch(value) {
-  case 0:   __ fmov_dx(D0_tos, ZR);    break;
-  case 1:   __ fmov_d (D0_tos, 0x70);  break;
-  default:  ShouldNotReachHere();      break;
-  }
-#else
   const int one_lo = 0;            // low part of 1.0
   const int one_hi = 0x3ff00000;   // high part of 1.0
 
@@ -378,7 +337,6 @@ void TemplateTable::dconst(int value) {
   } else {
     ShouldNotReachHere();
   }
-#endif // AARCH64
 }
 
 
@@ -417,25 +375,15 @@ void TemplateTable::ldc(bool wide) {
 
   // get const type
   __ add(Rtemp, Rtags, tags_offset);
-#ifdef AARCH64
-  __ add(Rtemp, Rtemp, Rindex);
-  __ ldarb(RtagType, Rtemp);  // TODO-AARCH64 figure out if barrier is needed here, or control dependency is enough
-#else
   __ ldrb(RtagType, Address(Rtemp, Rindex));
   volatile_barrier(MacroAssembler::LoadLoad, Rtemp);
-#endif // AARCH64
 
   // unresolved class - get the resolved class
   __ cmp(RtagType, JVM_CONSTANT_UnresolvedClass);
 
   // unresolved class in error (resolution failed) - call into runtime
   // so that the same error from first resolution attempt is thrown.
-#ifdef AARCH64
-  __ mov(Rtemp, JVM_CONSTANT_UnresolvedClassInError); // this constant does not fit into 5-bit immediate constraint
-  __ cond_cmp(RtagType, Rtemp, ne);
-#else
   __ cond_cmp(RtagType, JVM_CONSTANT_UnresolvedClassInError, ne);
-#endif // AARCH64
 
   // resolved class - need to call vm to get java mirror of the class
   __ cond_cmp(RtagType, JVM_CONSTANT_Class, ne);
@@ -545,12 +493,8 @@ void TemplateTable::ldc2_w() {
 
   __ cmp(Rtemp, JVM_CONSTANT_Long);
   __ b(Condy, ne);
-#ifdef AARCH64
-  __ ldr(R0_tos, Address(Rbase, base_offset));
-#else
   __ ldr(R0_tos_lo, Address(Rbase, base_offset + 0 * wordSize));
   __ ldr(R1_tos_hi, Address(Rbase, base_offset + 1 * wordSize));
-#endif // AARCH64
   __ push(ltos);
   __ b(exit);
 
@@ -576,12 +520,8 @@ void TemplateTable::condy_helper(Label& Done)
   // VMr2 = flags = (tos, off) using format of CPCE::_flags
   __ mov(off, flags);
 
-#ifdef AARCH64
-  __ andr(off, off, (unsigned)ConstantPoolCacheEntry::field_index_mask);
-#else
   __ logical_shift_left( off, off, 32 - ConstantPoolCacheEntry::field_index_bits);
   __ logical_shift_right(off, off, 32 - ConstantPoolCacheEntry::field_index_bits);
-#endif
 
   const Address field(obj, off);
 
@@ -641,13 +581,9 @@ void TemplateTable::condy_helper(Label& Done)
         __ cond_cmp(flags, dtos, ne);
         __ b(notLongDouble, ne);
 
-#ifdef AARCH64
-        __ ldr(R0_tos, field);
-#else
         __ add(rtmp, obj, wordSize);
         __ ldr(R0_tos_lo, Address(obj, off));
         __ ldr(R1_tos_hi, Address(rtmp, off));
-#endif
         __ push(ltos);
         __ b(Done);
 
@@ -895,13 +831,9 @@ void TemplateTable::laload() {
 
   index_check(Rarray, Rindex);
 
-#ifdef AARCH64
-  __ ldr(R0_tos, get_array_elem_addr(T_LONG, Rarray, Rindex, Rtemp));
-#else
   __ add(Rtemp, Rarray, AsmOperand(Rindex, lsl, LogBytesPerLong));
   __ add(Rtemp, Rtemp, arrayOopDesc::base_offset_in_bytes(T_LONG));
   __ ldmia(Rtemp, RegisterSet(R0_tos_lo, R1_tos_hi));
-#endif // AARCH64
 }
 
 
@@ -1006,12 +938,8 @@ void TemplateTable::iload(int n) {
 
 void TemplateTable::lload(int n) {
   transition(vtos, ltos);
-#ifdef AARCH64
-  __ ldr(R0_tos, laddress(n));
-#else
   __ ldr(R0_tos_lo, laddress(n));
   __ ldr(R1_tos_hi, haddress(n));
-#endif // AARCH64
 }
 
 
@@ -1099,14 +1027,8 @@ void TemplateTable::aload_0_internal(RewriteControl rc) {
     assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
 
     __ cmp(next_bytecode, Bytecodes::_fast_fgetfield);
-#ifdef AARCH64
-    __ mov(Rtemp, Bytecodes::_fast_faccess_0);
-    __ mov(target_bytecode, Bytecodes::_fast_aload_0);
-    __ mov(target_bytecode, Rtemp, eq);
-#else
     __ mov(target_bytecode, Bytecodes::_fast_faccess_0, eq);
     __ mov(target_bytecode, Bytecodes::_fast_aload_0, ne);
-#endif // AARCH64
 
     // rewrite
     __ bind(rewrite);
@@ -1192,11 +1114,7 @@ void TemplateTable::wide_lstore() {
   const Register Rlocal_index = R2_tmp;
   const Register Rlocal_base = R3_tmp;
 
-#ifdef AARCH64
-  __ pop_l(R0_tos);
-#else
   __ pop_l(R0_tos_lo, R1_tos_hi);
-#endif // AARCH64
 
   locals_index_wide(Rlocal_index);
   store_category2_local(Rlocal_index, R3_tmp);
@@ -1245,13 +1163,9 @@ void TemplateTable::lastore() {
   __ pop_i(Rindex);
   index_check(Rarray, Rindex);
 
-#ifdef AARCH64
-  __ str(R0_tos, get_array_elem_addr(T_LONG, Rarray, Rindex, Rtemp));
-#else
   __ add(Rtemp, Rarray, AsmOperand(Rindex, lsl, LogBytesPerLong));
   __ add(Rtemp, Rtemp, arrayOopDesc::base_offset_in_bytes(T_LONG));
   __ stmia(Rtemp, RegisterSet(R0_tos_lo, R1_tos_hi));
-#endif // AARCH64
 }
 
 
@@ -1404,12 +1318,8 @@ void TemplateTable::istore(int n) {
 
 void TemplateTable::lstore(int n) {
   transition(ltos, vtos);
-#ifdef AARCH64
-  __ str(R0_tos, laddress(n));
-#else
   __ str(R0_tos_lo, laddress(n));
   __ str(R1_tos_hi, haddress(n));
-#endif // AARCH64
 }
 
 
@@ -1566,15 +1476,9 @@ void TemplateTable::iop2(Operation op) {
     case _and : __ and_32 (R0_tos, arg1, arg2); break;
     case _or  : __ orr_32 (R0_tos, arg1, arg2); break;
     case _xor : __ eor_32 (R0_tos, arg1, arg2); break;
-#ifdef AARCH64
-    case shl  : __ lslv_w (R0_tos, arg1, arg2); break;
-    case shr  : __ asrv_w (R0_tos, arg1, arg2); break;
-    case ushr : __ lsrv_w (R0_tos, arg1, arg2); break;
-#else
     case shl  : __ andr(arg2, arg2, 0x1f); __ mov (R0_tos, AsmOperand(arg1, lsl, arg2)); break;
     case shr  : __ andr(arg2, arg2, 0x1f); __ mov (R0_tos, AsmOperand(arg1, asr, arg2)); break;
     case ushr : __ andr(arg2, arg2, 0x1f); __ mov (R0_tos, AsmOperand(arg1, lsr, arg2)); break;
-#endif // AARCH64
     default   : ShouldNotReachHere();
   }
 }
@@ -1582,20 +1486,6 @@ void TemplateTable::iop2(Operation op) {
 
 void TemplateTable::lop2(Operation op) {
   transition(ltos, ltos);
-#ifdef AARCH64
-  const Register arg1 = R1_tmp;
-  const Register arg2 = R0_tos;
-
-  __ pop_l(arg1);
-  switch (op) {
-    case add  : __ add (R0_tos, arg1, arg2); break;
-    case sub  : __ sub (R0_tos, arg1, arg2); break;
-    case _and : __ andr(R0_tos, arg1, arg2); break;
-    case _or  : __ orr (R0_tos, arg1, arg2); break;
-    case _xor : __ eor (R0_tos, arg1, arg2); break;
-    default   : ShouldNotReachHere();
-  }
-#else
   const Register arg1_lo = R2_tmp;
   const Register arg1_hi = R3_tmp;
   const Register arg2_lo = R0_tos_lo;
@@ -1610,20 +1500,11 @@ void TemplateTable::lop2(Operation op) {
     case _xor: __ eor (R0_tos_lo, arg1_lo, arg2_lo); __ eor (R1_tos_hi, arg1_hi, arg2_hi); break;
     default : ShouldNotReachHere();
   }
-#endif // AARCH64
 }
 
 
 void TemplateTable::idiv() {
   transition(itos, itos);
-#ifdef AARCH64
-  const Register divisor = R0_tos;
-  const Register dividend = R1_tmp;
-
-  __ cbz_w(divisor, Interpreter::_throw_ArithmeticException_entry);
-  __ pop_i(dividend);
-  __ sdiv_w(R0_tos, dividend, divisor);
-#else
   __ mov(R2, R0_tos);
   __ pop_i(R0);
   // R0 - dividend
@@ -1631,41 +1512,22 @@ void TemplateTable::idiv() {
   __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::none);
   // R1 - result
   __ mov(R0_tos, R1);
-#endif // AARCH64
 }
 
 
 void TemplateTable::irem() {
   transition(itos, itos);
-#ifdef AARCH64
-  const Register divisor = R0_tos;
-  const Register dividend = R1_tmp;
-  const Register quotient = R2_tmp;
-
-  __ cbz_w(divisor, Interpreter::_throw_ArithmeticException_entry);
-  __ pop_i(dividend);
-  __ sdiv_w(quotient, dividend, divisor);
-  __ msub_w(R0_tos, divisor, quotient, dividend);
-#else
   __ mov(R2, R0_tos);
   __ pop_i(R0);
   // R0 - dividend
   // R2 - divisor
   __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::none);
   // R0 - remainder
-#endif // AARCH64
 }
 
 
 void TemplateTable::lmul() {
   transition(ltos, ltos);
-#ifdef AARCH64
-  const Register arg1 = R0_tos;
-  const Register arg2 = R1_tmp;
-
-  __ pop_l(arg2);
-  __ mul(R0_tos, arg1, arg2);
-#else
   const Register arg1_lo = R0_tos_lo;
   const Register arg1_hi = R1_tos_hi;
   const Register arg2_lo = R2_tmp;
@@ -1674,20 +1536,11 @@ void TemplateTable::lmul() {
   __ pop_l(arg2_lo, arg2_hi);
 
   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lmul), arg1_lo, arg1_hi, arg2_lo, arg2_hi);
-#endif // AARCH64
 }
 
 
 void TemplateTable::ldiv() {
   transition(ltos, ltos);
-#ifdef AARCH64
-  const Register divisor = R0_tos;
-  const Register dividend = R1_tmp;
-
-  __ cbz(divisor, Interpreter::_throw_ArithmeticException_entry);
-  __ pop_l(dividend);
-  __ sdiv(R0_tos, dividend, divisor);
-#else
   const Register x_lo = R2_tmp;
   const Register x_hi = R3_tmp;
   const Register y_lo = R0_tos_lo;
@@ -1699,22 +1552,11 @@ void TemplateTable::ldiv() {
   __ orrs(Rtemp, y_lo, y_hi);
   __ call(Interpreter::_throw_ArithmeticException_entry, relocInfo::none, eq);
   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), y_lo, y_hi, x_lo, x_hi);
-#endif // AARCH64
 }
 
 
 void TemplateTable::lrem() {
   transition(ltos, ltos);
-#ifdef AARCH64
-  const Register divisor = R0_tos;
-  const Register dividend = R1_tmp;
-  const Register quotient = R2_tmp;
-
-  __ cbz(divisor, Interpreter::_throw_ArithmeticException_entry);
-  __ pop_l(dividend);
-  __ sdiv(quotient, dividend, divisor);
-  __ msub(R0_tos, divisor, quotient, dividend);
-#else
   const Register x_lo = R2_tmp;
   const Register x_hi = R3_tmp;
   const Register y_lo = R0_tos_lo;
@@ -1726,18 +1568,11 @@ void TemplateTable::lrem() {
   __ orrs(Rtemp, y_lo, y_hi);
   __ call(Interpreter::_throw_ArithmeticException_entry, relocInfo::none, eq);
   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), y_lo, y_hi, x_lo, x_hi);
-#endif // AARCH64
 }
 
 
 void TemplateTable::lshl() {
   transition(itos, ltos);
-#ifdef AARCH64
-  const Register val = R1_tmp;
-  const Register shift_cnt = R0_tos;
-  __ pop_l(val);
-  __ lslv(R0_tos, val, shift_cnt);
-#else
   const Register shift_cnt = R4_tmp;
   const Register val_lo = R2_tmp;
   const Register val_hi = R3_tmp;
@@ -1745,18 +1580,11 @@ void TemplateTable::lshl() {
   __ pop_l(val_lo, val_hi);
   __ andr(shift_cnt, R0_tos, 63);
   __ long_shift(R0_tos_lo, R1_tos_hi, val_lo, val_hi, lsl, shift_cnt);
-#endif // AARCH64
 }
 
 
 void TemplateTable::lshr() {
   transition(itos, ltos);
-#ifdef AARCH64
-  const Register val = R1_tmp;
-  const Register shift_cnt = R0_tos;
-  __ pop_l(val);
-  __ asrv(R0_tos, val, shift_cnt);
-#else
   const Register shift_cnt = R4_tmp;
   const Register val_lo = R2_tmp;
   const Register val_hi = R3_tmp;
@@ -1764,18 +1592,11 @@ void TemplateTable::lshr() {
   __ pop_l(val_lo, val_hi);
   __ andr(shift_cnt, R0_tos, 63);
   __ long_shift(R0_tos_lo, R1_tos_hi, val_lo, val_hi, asr, shift_cnt);
-#endif // AARCH64
 }
 
 
 void TemplateTable::lushr() {
   transition(itos, ltos);
-#ifdef AARCH64
-  const Register val = R1_tmp;
-  const Register shift_cnt = R0_tos;
-  __ pop_l(val);
-  __ lsrv(R0_tos, val, shift_cnt);
-#else
   const Register shift_cnt = R4_tmp;
   const Register val_lo = R2_tmp;
   const Register val_hi = R3_tmp;
@@ -1783,7 +1604,6 @@ void TemplateTable::lushr() {
   __ pop_l(val_lo, val_hi);
   __ andr(shift_cnt, R0_tos, 63);
   __ long_shift(R0_tos_lo, R1_tos_hi, val_lo, val_hi, lsr, shift_cnt);
-#endif // AARCH64
 }
 
 
@@ -1879,12 +1699,8 @@ void TemplateTable::ineg() {
 
 void TemplateTable::lneg() {
   transition(ltos, ltos);
-#ifdef AARCH64
-  __ neg(R0_tos, R0_tos);
-#else
   __ rsbs(R0_tos_lo, R0_tos_lo, 0);
   __ rsc (R1_tos_hi, R1_tos_hi, 0);
-#endif // AARCH64
 }
 
 
@@ -1994,37 +1810,25 @@ void TemplateTable::convert() {
   // Conversion
   switch (bytecode()) {
     case Bytecodes::_i2l:
-#ifdef AARCH64
-      __ sign_extend(R0_tos, R0_tos, 32);
-#else
       __ mov(R1_tos_hi, AsmOperand(R0_tos, asr, BitsPerWord-1));
-#endif // AARCH64
       break;
 
     case Bytecodes::_i2f:
-#ifdef AARCH64
-      __ scvtf_sw(S0_tos, R0_tos);
-#else
 #ifdef __SOFTFP__
       __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_i2f), R0_tos);
 #else
       __ fmsr(S0_tmp, R0_tos);
       __ fsitos(S0_tos, S0_tmp);
 #endif // __SOFTFP__
-#endif // AARCH64
       break;
 
     case Bytecodes::_i2d:
-#ifdef AARCH64
-      __ scvtf_dw(D0_tos, R0_tos);
-#else
 #ifdef __SOFTFP__
       __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_i2d), R0_tos);
 #else
       __ fmsr(S0_tmp, R0_tos);
       __ fsitod(D0_tos, S0_tmp);
 #endif // __SOFTFP__
-#endif // AARCH64
       break;
 
     case Bytecodes::_i2b:
@@ -2044,49 +1848,33 @@ void TemplateTable::convert() {
       break;
 
     case Bytecodes::_l2f:
-#ifdef AARCH64
-      __ scvtf_sx(S0_tos, R0_tos);
-#else
       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::l2f), R0_tos_lo, R1_tos_hi);
 #if !defined(__SOFTFP__) && !defined(__ABI_HARD__)
       __ fmsr(S0_tos, R0);
 #endif // !__SOFTFP__ && !__ABI_HARD__
-#endif // AARCH64
       break;
 
     case Bytecodes::_l2d:
-#ifdef AARCH64
-      __ scvtf_dx(D0_tos, R0_tos);
-#else
       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::l2d), R0_tos_lo, R1_tos_hi);
 #if !defined(__SOFTFP__) && !defined(__ABI_HARD__)
       __ fmdrr(D0_tos, R0, R1);
 #endif // !__SOFTFP__ && !__ABI_HARD__
-#endif // AARCH64
       break;
 
     case Bytecodes::_f2i:
-#ifdef AARCH64
-      __ fcvtzs_ws(R0_tos, S0_tos);
-#else
 #ifndef __SOFTFP__
       __ ftosizs(S0_tos, S0_tos);
       __ fmrs(R0_tos, S0_tos);
 #else
       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), R0_tos);
 #endif // !__SOFTFP__
-#endif // AARCH64
       break;
 
     case Bytecodes::_f2l:
-#ifdef AARCH64
-      __ fcvtzs_xs(R0_tos, S0_tos);
-#else
 #ifndef __SOFTFP__
       __ fmrs(R0_tos, S0_tos);
 #endif // !__SOFTFP__
       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), R0_tos);
-#endif // AARCH64
       break;
 
     case Bytecodes::_f2d:
@@ -2098,27 +1886,19 @@ void TemplateTable::convert() {
       break;
 
     case Bytecodes::_d2i:
-#ifdef AARCH64
-      __ fcvtzs_wd(R0_tos, D0_tos);
-#else
 #ifndef __SOFTFP__
       __ ftosizd(Stemp, D0);
       __ fmrs(R0, Stemp);
 #else
       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), R0_tos_lo, R1_tos_hi);
 #endif // !__SOFTFP__
-#endif // AARCH64
       break;
 
     case Bytecodes::_d2l:
-#ifdef AARCH64
-      __ fcvtzs_xd(R0_tos, D0_tos);
-#else
 #ifndef __SOFTFP__
       __ fmrrd(R0_tos_lo, R1_tos_hi, D0_tos);
 #endif // !__SOFTFP__
       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), R0_tos_lo, R1_tos_hi);
-#endif // AARCH64
       break;
 
     case Bytecodes::_d2f:
@@ -2137,16 +1917,6 @@ void TemplateTable::convert() {
 
 void TemplateTable::lcmp() {
   transition(ltos, itos);
-#ifdef AARCH64
-  const Register arg1 = R1_tmp;
-  const Register arg2 = R0_tos;
-
-  __ pop_l(arg1);
-
-  __ cmp(arg1, arg2);
-  __ cset(R0_tos, gt);               // 1 if '>', else 0
-  __ csinv(R0_tos, R0_tos, ZR, ge);  // previous value if '>=', else -1
-#else
   const Register arg1_lo = R2_tmp;
   const Register arg1_hi = R3_tmp;
   const Register arg2_lo = R0_tos_lo;
@@ -2169,33 +1939,12 @@ void TemplateTable::lcmp() {
   __ mov (res, 1, hi);
   __ bind(done);
   __ mov (R0_tos, res);
-#endif // AARCH64
 }
 
 
 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
   assert((unordered_result == 1) || (unordered_result == -1), "invalid unordered result");
 
-#ifdef AARCH64
-  if (is_float) {
-    transition(ftos, itos);
-    __ pop_f(S1_tmp);
-    __ fcmp_s(S1_tmp, S0_tos);
-  } else {
-    transition(dtos, itos);
-    __ pop_d(D1_tmp);
-    __ fcmp_d(D1_tmp, D0_tos);
-  }
-
-  if (unordered_result < 0) {
-    __ cset(R0_tos, gt);               // 1 if '>', else 0
-    __ csinv(R0_tos, R0_tos, ZR, ge);  // previous value if '>=', else -1
-  } else {
-    __ cset(R0_tos, hi);               // 1 if '>' or unordered, else 0
-    __ csinv(R0_tos, R0_tos, ZR, pl);  // previous value if '>=' or unordered, else -1
-  }
-
-#else
 
 #ifdef __SOFTFP__
 
@@ -2261,7 +2010,6 @@ void TemplateTable::float_cmp(bool is_float, int unordered_result) {
   }
   __ mov(R0_tos, 0, eq);         // result ==  0 if equ (Z=1)
 #endif // __SOFTFP__
-#endif // AARCH64
 }
 
 
@@ -2306,12 +2054,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
     __ sub(Rret_addr, Rret_addr, Rtemp);
 
     // Load the next target bytecode into R3_bytecode and advance Rbcp
-#ifdef AARCH64
-    __ add(Rbcp, Rbcp, Rdisp);
-    __ ldrb(R3_bytecode, Address(Rbcp));
-#else
     __ ldrb(R3_bytecode, Address(Rbcp, Rdisp, lsl, 0, pre_indexed));
-#endif // AARCH64
 
     // Push return address
     __ push_i(Rret_addr);
@@ -2323,12 +2066,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
   // Normal (non-jsr) branch handling
 
   // Adjust the bcp by the displacement in Rdisp and load next bytecode.
-#ifdef AARCH64
-  __ add(Rbcp, Rbcp, Rdisp);
-  __ ldrb(R3_bytecode, Address(Rbcp));
-#else
   __ ldrb(R3_bytecode, Address(Rbcp, Rdisp, lsl, 0, pre_indexed));
-#endif // AARCH64
 
   assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
   Label backedge_counter_overflow;
@@ -2343,12 +2081,8 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
     const Register Rcounters = R1_tmp;
 
     // count only if backward branch
-#ifdef AARCH64
-    __ tbz(Rdisp, (BitsPerWord - 1), dispatch); // TODO-AARCH64: check performance of this variant on 32-bit ARM
-#else
     __ tst(Rdisp, Rdisp);
     __ b(dispatch, pl);
-#endif // AARCH64
 
     if (TieredCompilation) {
       Label no_mdo;
@@ -2367,10 +2101,10 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
       }
       __ bind(no_mdo);
       // Increment backedge counter in MethodCounters*
-      // Note Rbumped_taken_count is a callee saved registers for ARM32, but caller saved for ARM64
+      // Note Rbumped_taken_count is a callee saved registers for ARM32
       __ get_method_counters(Rmethod, Rcounters, dispatch, true /*saveRegs*/,
                              Rdisp, R3_bytecode,
-                             AARCH64_ONLY(Rbumped_taken_count) NOT_AARCH64(noreg));
+                             noreg);
       const Address mask(Rcounters, in_bytes(MethodCounters::backedge_mask_offset()));
       __ increment_mask_and_jump(Address(Rcounters, be_offset), increment, mask,
                                  Rcnt, R4_tmp, eq, &backedge_counter_overflow);
@@ -2378,17 +2112,13 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
       // Increment backedge counter in MethodCounters*
       __ get_method_counters(Rmethod, Rcounters, dispatch, true /*saveRegs*/,
                              Rdisp, R3_bytecode,
-                             AARCH64_ONLY(Rbumped_taken_count) NOT_AARCH64(noreg));
+                             noreg);
       __ ldr_u32(Rtemp, Address(Rcounters, be_offset));           // load backedge counter
       __ add(Rtemp, Rtemp, InvocationCounter::count_increment);   // increment counter
       __ str_32(Rtemp, Address(Rcounters, be_offset));            // store counter
 
       __ ldr_u32(Rcnt, Address(Rcounters, inv_offset));           // load invocation counter
-#ifdef AARCH64
-      __ andr(Rcnt, Rcnt, (unsigned int)InvocationCounter::count_mask_value);  // and the status bits
-#else
       __ bic(Rcnt, Rcnt, ~InvocationCounter::count_mask_value);  // and the status bits
-#endif // AARCH64
       __ add(Rcnt, Rcnt, Rtemp);                                 // add both counters
 
       if (ProfileInterpreter) {
@@ -2415,13 +2145,9 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
           // sure the overflow function is called only once every overflow_frequency.
           const int overflow_frequency = 1024;
 
-#ifdef AARCH64
-          __ tst(Rbumped_taken_count, (unsigned)(overflow_frequency-1));
-#else
           // was '__ andrs(...,overflow_frequency-1)', testing if lowest 10 bits are 0
           assert(overflow_frequency == (1 << 10),"shift by 22 not correct for expected frequency");
           __ movs(Rbumped_taken_count, AsmOperand(Rbumped_taken_count, lsl, 22));
-#endif // AARCH64
 
           __ b(backedge_counter_overflow, eq);
         }
@@ -2490,13 +2216,8 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
       __ ldr(R1_tmp, Address(Rtmp_save0, nmethod::osr_entry_point_offset()));
       __ ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
 
-#ifdef AARCH64
-      __ ldp(FP, LR, Address(FP));
-      __ mov(SP, Rtemp);
-#else
       __ ldmia(FP, RegisterSet(FP) | RegisterSet(LR));
       __ bic(SP, Rtemp, StackAlignmentInBytes - 1);     // Remove frame and align stack
-#endif // AARCH64
 
       __ jump(R1_tmp);
     }
@@ -2508,19 +2229,8 @@ void TemplateTable::if_0cmp(Condition cc) {
   transition(itos, vtos);
   // assume branch is more often taken than not (loops use backward branches)
   Label not_taken;
-#ifdef AARCH64
-  if (cc == equal) {
-    __ cbnz_w(R0_tos, not_taken);
-  } else if (cc == not_equal) {
-    __ cbz_w(R0_tos, not_taken);
-  } else {
-    __ cmp_32(R0_tos, 0);
-    __ b(not_taken, convNegCond(cc));
-  }
-#else
   __ cmp_32(R0_tos, 0);
   __ b(not_taken, convNegCond(cc));
-#endif // AARCH64
   branch(false, false);
   __ bind(not_taken);
   __ profile_not_taken_branch(R0_tmp);
@@ -2606,9 +2316,7 @@ void TemplateTable::tableswitch() {
   transition(itos, vtos);
 
   const Register Rindex  = R0_tos;
-#ifndef AARCH64
   const Register Rtemp2  = R1_tmp;
-#endif // !AARCH64
   const Register Rabcp   = R2_tmp;  // aligned bcp
   const Register Rlow    = R3_tmp;
   const Register Rhigh   = R4_tmp;
@@ -2619,38 +2327,13 @@ void TemplateTable::tableswitch() {
   __ align_reg(Rabcp, Rtemp, BytesPerInt);
 
   // load lo & hi
-#ifdef AARCH64
-  __ ldp_w(Rlow, Rhigh, Address(Rabcp, 2*BytesPerInt, post_indexed));
-#else
   __ ldmia(Rabcp, RegisterSet(Rlow) | RegisterSet(Rhigh), writeback);
-#endif // AARCH64
   __ byteswap_u32(Rlow, Rtemp, Rtemp2);
   __ byteswap_u32(Rhigh, Rtemp, Rtemp2);
 
   // compare index with high bound
   __ cmp_32(Rhigh, Rindex);
 
-#ifdef AARCH64
-  Label default_case, do_dispatch;
-  __ ccmp_w(Rindex, Rlow, Assembler::flags_for_condition(lt), ge);
-  __ b(default_case, lt);
-
-  __ sub_w(Rindex, Rindex, Rlow);
-  __ ldr_s32(Roffset, Address(Rabcp, Rindex, ex_sxtw, LogBytesPerInt));
-  if(ProfileInterpreter) {
-    __ sxtw(Rindex, Rindex);
-    __ profile_switch_case(Rabcp, Rindex, Rtemp2, R0_tmp);
-  }
-  __ b(do_dispatch);
-
-  __ bind(default_case);
-  __ ldr_s32(Roffset, Address(Rabcp, -3 * BytesPerInt));
-  if(ProfileInterpreter) {
-    __ profile_switch_default(R0_tmp);
-  }
-
-  __ bind(do_dispatch);
-#else
 
   // if Rindex <= Rhigh then calculate index in table (Rindex - Rlow)
   __ subs(Rindex, Rindex, Rlow, ge);
@@ -2676,17 +2359,11 @@ void TemplateTable::tableswitch() {
     __ ldr(Roffset, Address(Rabcp, -3 * BytesPerInt), lt);
     __ ldr(Roffset, Address(Rabcp, Rindex, lsl, LogBytesPerInt), ge);
   }
-#endif // AARCH64
 
   __ byteswap_u32(Roffset, Rtemp, Rtemp2);
 
   // load the next bytecode to R3_bytecode and advance Rbcp
-#ifdef AARCH64
-  __ add(Rbcp, Rbcp, Roffset, ex_sxtw);
-  __ ldrb(R3_bytecode, Address(Rbcp));
-#else
   __ ldrb(R3_bytecode, Address(Rbcp, Roffset, lsl, 0, pre_indexed));
-#endif // AARCH64
   __ dispatch_only(vtos);
 
 }
@@ -2716,32 +2393,19 @@ void TemplateTable::fast_linearswitch() {
   __ align_reg(Rabcp, Rtemp, BytesPerInt);
 
   // load default & counter
-#ifdef AARCH64
-  __ ldp_w(Rdefault, Rcount, Address(Rabcp, 2*BytesPerInt, post_indexed));
-#else
   __ ldmia(Rabcp, RegisterSet(Rdefault) | RegisterSet(Rcount), writeback);
-#endif // AARCH64
   __ byteswap_u32(Rcount, R1_tmp, Rtemp);
 
-#ifdef AARCH64
-  __ cbz_w(Rcount, default_case);
-#else
   __ cmp_32(Rcount, 0);
   __ ldr(Rtemp, Address(Rabcp, 2*BytesPerInt, post_indexed), ne);
   __ b(default_case, eq);
-#endif // AARCH64
 
   // table search
   __ bind(loop);
-#ifdef AARCH64
-  __ ldr_s32(Rtemp, Address(Rabcp, 2*BytesPerInt, post_indexed));
-#endif // AARCH64
   __ cmp_32(Rtemp, Rkey);
   __ b(found, eq);
   __ subs(Rcount, Rcount, 1);
-#ifndef AARCH64
   __ ldr(Rtemp, Address(Rabcp, 2*BytesPerInt, post_indexed), ne);
-#endif // !AARCH64
   __ b(loop, ne);
 
   // default case
@@ -2776,12 +2440,7 @@ void TemplateTable::fast_linearswitch() {
   __ byteswap_u32(Roffset, R1_tmp, Rtemp);
 
   // load the next bytecode to R3_bytecode and advance Rbcp
-#ifdef AARCH64
-  __ add(Rbcp, Rbcp, Roffset, ex_sxtw);
-  __ ldrb(R3_bytecode, Address(Rbcp));
-#else
   __ ldrb(R3_bytecode, Address(Rbcp, Roffset, lsl, 0, pre_indexed));
-#endif // AARCH64
   __ dispatch_only(vtos);
 }
 
@@ -2849,12 +2508,7 @@ void TemplateTable::fast_binaryswitch() {
     // } else {
     //   i = h;
     // }
-#ifdef AARCH64
-    __ add(temp1, array, AsmOperand(h, lsl, 1+LogBytesPerInt));
-    __ ldr_s32(val, Address(temp1));
-#else
     __ ldr_s32(val, Address(array, h, lsl, 1+LogBytesPerInt));
-#endif // AARCH64
     // Convert array[h].match to native byte-ordering before compare
     __ byteswap_u32(val, temp1, temp2);
     __ cmp_32(key, val);
@@ -2870,12 +2524,7 @@ void TemplateTable::fast_binaryswitch() {
   // end of binary search, result index is i (must check again!)
   Label default_case;
   // Convert array[i].match to native byte-ordering before compare
-#ifdef AARCH64
-  __ add(temp1, array, AsmOperand(i, lsl, 1+LogBytesPerInt));
-  __ ldr_s32(val, Address(temp1));
-#else
   __ ldr_s32(val, Address(array, i, lsl, 1+LogBytesPerInt));
-#endif // AARCH64
   __ byteswap_u32(val, temp1, temp2);
   __ cmp_32(key, val);
   __ b(default_case, ne);
@@ -2885,12 +2534,7 @@ void TemplateTable::fast_binaryswitch() {
   __ ldr_s32(offset, Address(temp1, 1*BytesPerInt));
   __ profile_switch_case(R0, i, R1, i);
   __ byteswap_u32(offset, temp1, temp2);
-#ifdef AARCH64
-  __ add(Rbcp, Rbcp, offset, ex_sxtw);
-  __ ldrb(R3_bytecode, Address(Rbcp));
-#else
   __ ldrb(R3_bytecode, Address(Rbcp, offset, lsl, 0, pre_indexed));
-#endif // AARCH64
   __ dispatch_only(vtos);
 
   // default case
@@ -2898,12 +2542,7 @@ void TemplateTable::fast_binaryswitch() {
   __ profile_switch_default(R0);
   __ ldr_s32(offset, Address(array, -2*BytesPerInt));
   __ byteswap_u32(offset, temp1, temp2);
-#ifdef AARCH64
-  __ add(Rbcp, Rbcp, offset, ex_sxtw);
-  __ ldrb(R3_bytecode, Address(Rbcp));
-#else
   __ ldrb(R3_bytecode, Address(Rbcp, offset, lsl, 0, pre_indexed));
-#endif // AARCH64
   __ dispatch_only(vtos);
 }
 
@@ -2935,13 +2574,11 @@ void TemplateTable::_return(TosState state) {
 
   __ interp_verify_oop(R0_tos, state, __FILE__, __LINE__);
 
-#ifndef AARCH64
   // According to interpreter calling conventions, result is returned in R0/R1,
   // so ftos (S0) and dtos (D0) are moved to R0/R1.
   // This conversion should be done after remove_activation, as it uses
   // push(state) & pop(state) to preserve return value.
   __ convert_tos_to_retval(state);
-#endif // !AARCH64
 
   __ ret();
 
@@ -2975,19 +2612,14 @@ void TemplateTable::_return(TosState state) {
 // requirement (1) but miss the volatile-store-volatile-load case.  This final
 // case is placed after volatile-stores although it could just as well go
 // before volatile-loads.
-// TODO-AARCH64: consider removing extra unused parameters
 void TemplateTable::volatile_barrier(MacroAssembler::Membar_mask_bits order_constraint,
                                      Register tmp,
                                      bool preserve_flags,
                                      Register load_tgt) {
-#ifdef AARCH64
-  __ membar(order_constraint);
-#else
   __ membar(order_constraint, tmp, preserve_flags, load_tgt);
-#endif
 }
 
-// Blows all volatile registers: R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR.
+// Blows all volatile registers: R0-R3, Rtemp, LR.
 void TemplateTable::resolve_cache_and_index(int byte_no,
                                             Register Rcache,
                                             Register Rindex,
@@ -3049,7 +2681,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Rcache,
 }
 
 
-// Blows all volatile registers: R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR.
+// Blows all volatile registers: R0-R3, Rtemp, LR.
 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
                                                Register method,
                                                Register itable_index,
@@ -3092,7 +2724,7 @@ void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
 
 
 // The registers cache and index expected to be set before call, and should not be Rtemp.
-// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR,
+// Blows volatile registers R0-R3, Rtemp, LR,
 // except cache and index registers which are preserved.
 void TemplateTable::jvmti_post_field_access(Register Rcache,
                                             Register Rindex,
@@ -3174,10 +2806,6 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
   // 1) Table switch using add(PC,...) instruction (fast_version)
   // 2) Table switch using ldr(PC,...) instruction
   //
-  // AArch64:
-  // 1) Table switch using adr/add/br instructions (fast_version)
-  // 2) Table switch using adr/ldr/br instructions
-  //
   // First version requires fixed size of code block for each case and
   // can not be used in RewriteBytecodes and VerifyOops
   // modes.
@@ -3191,25 +2819,11 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
 
   // On 32-bit ARM atos and itos cases can be merged only for fast version, because
   // atos requires additional processing in slow version.
-  // On AArch64 atos and itos cannot be merged.
-  bool atos_merged_with_itos = AARCH64_ONLY(false) NOT_AARCH64(fast_version);
+  bool atos_merged_with_itos = fast_version;
 
   assert(number_of_states == 10, "number of tos states should be equal to 9");
 
   __ cmp(Rflags, itos);
-#ifdef AARCH64
-  __ b(Lint, eq);
-
-  if(fast_version) {
-    __ adr(Rtemp, Lbtos);
-    __ add(Rtemp, Rtemp, AsmOperand(Rflags, lsl, log_max_block_size + Assembler::LogInstructionSize));
-    __ br(Rtemp);
-  } else {
-    __ adr(Rtemp, Ltable);
-    __ ldr(Rtemp, Address::indexed_ptr(Rtemp, Rflags));
-    __ br(Rtemp);
-  }
-#else
   if(atos_merged_with_itos) {
     __ cmp(Rflags, atos, ne);
   }
@@ -3223,13 +2837,11 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
 
   // jump to itos/atos case
   __ b(Lint);
-#endif // AARCH64
 
   // table with addresses for slow version
   if (fast_version) {
     // nothing to do
   } else  {
-    AARCH64_ONLY(__ align(wordSize));
     __ bind(Ltable);
     __ emit_address(Lbtos);
     __ emit_address(Lztos);
@@ -3312,12 +2924,8 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
     assert(ltos == seq++, "ltos has unexpected value");
     FixedSizeCodeBlock ltos_block(_masm, max_block_size, fast_version);
     __ bind(Lltos);
-#ifdef AARCH64
-    __ ldr(R0_tos, Address(Robj, Roffset));
-#else
     __ add(Roffset, Robj, Roffset);
     __ ldmia(Roffset, RegisterSet(R0_tos_lo, R1_tos_hi));
-#endif // AARCH64
     __ push(ltos);
     if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_lgetfield, R0_tmp, Rtemp);
@@ -3347,12 +2955,8 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
     __ bind(Ldtos);
     // doubles and longs are placed on stack in the same way, so
     // we can use push(ltos) to transfer value without using VFP
-#ifdef AARCH64
-    __ ldr(R0_tos, Address(Robj, Roffset));
-#else
     __ add(Rtemp, Robj, Roffset);
     __ ldmia(Rtemp, RegisterSet(R0_tos_lo, R1_tos_hi));
-#endif // AARCH64
     __ push(ltos);
     if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_dgetfield, R0_tmp, Rtemp);
@@ -3364,7 +2968,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
   {
     assert(atos == seq++, "atos has unexpected value");
 
-    // atos case for AArch64 and slow version on 32-bit ARM
+    // atos case for slow version on 32-bit ARM
     if(!atos_merged_with_itos) {
       __ bind(Latos);
       do_oop_load(_masm, R0_tos, Address(Robj, Roffset));
@@ -3421,7 +3025,7 @@ void TemplateTable::getstatic(int byte_no) {
 
 
 // The registers cache and index expected to be set before call, and should not be R1 or Rtemp.
-// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR,
+// Blows volatile registers R0-R3, Rtemp, LR,
 // except cache and index registers which are preserved.
 void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rindex, bool is_static) {
   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
@@ -3453,18 +3057,11 @@ void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rindex, bool
 
       __ cmp(Rtemp, ltos);
       __ cond_cmp(Rtemp, dtos, ne);
-#ifdef AARCH64
-      __ mov(Rtemp, Interpreter::expr_offset_in_bytes(2));
-      __ mov(R1, Interpreter::expr_offset_in_bytes(1));
-      __ mov(R1, Rtemp, eq);
-      __ ldr(R1, Address(Rstack_top, R1));
-#else
       // two word value (ltos/dtos)
       __ ldr(R1, Address(SP, Interpreter::expr_offset_in_bytes(2)), eq);
 
       // one word value (not ltos, dtos)
       __ ldr(R1, Address(SP, Interpreter::expr_offset_in_bytes(1)), ne);
-#endif // AARCH64
     }
 
     // cache entry pointer
@@ -3527,39 +3124,22 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
   // 1) Table switch using add(PC,...) instruction (fast_version)
   // 2) Table switch using ldr(PC,...) instruction
   //
-  // AArch64:
-  // 1) Table switch using adr/add/br instructions (fast_version)
-  // 2) Table switch using adr/ldr/br instructions
-  //
   // First version requires fixed size of code block for each case and
   // can not be used in RewriteBytecodes and VerifyOops
   // modes.
 
   // Size of fixed size code block for fast_version (in instructions)
-  const int log_max_block_size = AARCH64_ONLY(is_static ? 2 : 3) NOT_AARCH64(3);
+  const int log_max_block_size = 3;
   const int max_block_size = 1 << log_max_block_size;
 
   // Decide if fast version is enabled
-  bool fast_version = (is_static || !RewriteBytecodes) && !VerifyOops && !ZapHighNonSignificantBits;
+  bool fast_version = (is_static || !RewriteBytecodes) && !VerifyOops;
 
   assert(number_of_states == 10, "number of tos states should be equal to 9");
 
   // itos case is frequent and is moved outside table switch
   __ cmp(Rflags, itos);
 
-#ifdef AARCH64
-  __ b(Lint, eq);
-
-  if (fast_version) {
-    __ adr(Rtemp, Lbtos);
-    __ add(Rtemp, Rtemp, AsmOperand(Rflags, lsl, log_max_block_size + Assembler::LogInstructionSize));
-    __ br(Rtemp);
-  } else {
-    __ adr(Rtemp, Ltable);
-    __ ldr(Rtemp, Address::indexed_ptr(Rtemp, Rflags));
-    __ br(Rtemp);
-  }
-#else
   // table switch by type
   if (fast_version) {
     __ add(PC, PC, AsmOperand(Rflags, lsl, log_max_block_size + Assembler::LogInstructionSize), ne);
@@ -3569,13 +3149,11 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
 
   // jump to itos case
   __ b(Lint);
-#endif // AARCH64
 
   // table with addresses for slow version
   if (fast_version) {
     // nothing to do
   } else  {
-    AARCH64_ONLY(__ align(wordSize));
     __ bind(Ltable);
     __ emit_address(Lbtos);
     __ emit_address(Lztos);
@@ -3663,12 +3241,8 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
     __ bind(Lltos);
     __ pop(ltos);
     if (!is_static) pop_and_check_object(Robj);
-#ifdef AARCH64
-    __ str(R0_tos, Address(Robj, Roffset));
-#else
     __ add(Roffset, Robj, Roffset);
     __ stmia(Roffset, RegisterSet(R0_tos_lo, R1_tos_hi));
-#endif // AARCH64
     if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_lputfield, R0_tmp, Rtemp, true, byte_no);
     }
@@ -3700,12 +3274,8 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
     // we can use pop(ltos) to transfer value without using VFP
     __ pop(ltos);
     if (!is_static) pop_and_check_object(Robj);
-#ifdef AARCH64
-    __ str(R0_tos, Address(Robj, Roffset));
-#else
     __ add(Rtemp, Robj, Roffset);
     __ stmia(Rtemp, RegisterSet(R0_tos_lo, R1_tos_hi));
-#endif // AARCH64
     if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_dputfield, R0_tmp, Rtemp, true, byte_no);
     }
@@ -3790,7 +3360,7 @@ void TemplateTable::jvmti_post_fast_field_mod() {
   Unimplemented();
 }
 
-// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR,
+// Blows volatile registers R0-R3, Rtemp, LR,
 // but preserves tosca with the given state.
 void TemplateTable::jvmti_post_fast_field_mod(TosState state) {
   if (__ can_post_field_modification()) {
@@ -3859,7 +3429,6 @@ void TemplateTable::fast_storefield(TosState state) {
     Label notVolatile;
     __ tbz(Rflags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
 
-    // TODO-AARCH64 on AArch64, store-release instructions can be used to get rid of this explict barrier
     volatile_barrier(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp);
 
     __ bind(notVolatile);
@@ -3876,11 +3445,6 @@ void TemplateTable::fast_storefield(TosState state) {
     case Bytecodes::_fast_sputfield: // fall through
     case Bytecodes::_fast_cputfield: __ strh(R0_tos, Address(Robj, Roffset)); break;
     case Bytecodes::_fast_iputfield: __ str_32(R0_tos, Address(Robj, Roffset)); break;
-#ifdef AARCH64
-    case Bytecodes::_fast_lputfield: __ str  (R0_tos, Address(Robj, Roffset)); break;
-    case Bytecodes::_fast_fputfield: __ str_s(S0_tos, Address(Robj, Roffset)); break;
-    case Bytecodes::_fast_dputfield: __ str_d(D0_tos, Address(Robj, Roffset)); break;
-#else
     case Bytecodes::_fast_lputfield: __ add(Robj, Robj, Roffset);
                                      __ stmia(Robj, RegisterSet(R0_tos_lo, R1_tos_hi)); break;
 
@@ -3894,7 +3458,6 @@ void TemplateTable::fast_storefield(TosState state) {
     case Bytecodes::_fast_dputfield: __ add(Robj, Robj, Roffset);
                                      __ fstd(D0_tos, Address(Robj));          break;
 #endif // __SOFTFP__
-#endif // AARCH64
 
     case Bytecodes::_fast_aputfield:
       do_oop_store(_masm, Address(Robj, Roffset), R0_tos, Rtemp, R1_tmp, R2_tmp, false);
@@ -3977,11 +3540,6 @@ void TemplateTable::fast_accessfield(TosState state) {
     case Bytecodes::_fast_sgetfield: __ ldrsh(R0_tos, Address(Robj, Roffset)); break;
     case Bytecodes::_fast_cgetfield: __ ldrh (R0_tos, Address(Robj, Roffset)); break;
     case Bytecodes::_fast_igetfield: __ ldr_s32(R0_tos, Address(Robj, Roffset)); break;
-#ifdef AARCH64
-    case Bytecodes::_fast_lgetfield: __ ldr  (R0_tos, Address(Robj, Roffset)); break;
-    case Bytecodes::_fast_fgetfield: __ ldr_s(S0_tos, Address(Robj, Roffset)); break;
-    case Bytecodes::_fast_dgetfield: __ ldr_d(D0_tos, Address(Robj, Roffset)); break;
-#else
     case Bytecodes::_fast_lgetfield: __ add(Roffset, Robj, Roffset);
                                      __ ldmia(Roffset, RegisterSet(R0_tos_lo, R1_tos_hi)); break;
 #ifdef __SOFTFP__
@@ -3992,7 +3550,6 @@ void TemplateTable::fast_accessfield(TosState state) {
     case Bytecodes::_fast_fgetfield: __ add(Roffset, Robj, Roffset); __ flds(S0_tos, Address(Roffset)); break;
     case Bytecodes::_fast_dgetfield: __ add(Roffset, Robj, Roffset); __ fldd(D0_tos, Address(Roffset)); break;
 #endif // __SOFTFP__
-#endif // AARCH64
     case Bytecodes::_fast_agetfield: do_oop_load(_masm, R0_tos, Address(Robj, Roffset)); __ verify_oop(R0_tos); break;
     default:
       ShouldNotReachHere();
@@ -4003,7 +3560,6 @@ void TemplateTable::fast_accessfield(TosState state) {
     Label notVolatile;
     __ tbz(Rflags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
 
-    // TODO-AARCH64 on AArch64, load-acquire instructions can be used to get rid of this explict barrier
     volatile_barrier(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), Rtemp);
 
     __ bind(notVolatile);
@@ -4041,56 +3597,22 @@ void TemplateTable::fast_xaccess(TosState state) {
   __ null_check(Robj, Rtemp);
   __ sub(Rbcp, Rbcp, 1);
 
-#ifdef AARCH64
-  if (gen_volatile_check) {
-    Label notVolatile;
-    __ tbz(Rflags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
-
-    __ add(Rtemp, Robj, Roffset);
-
-    if (state == itos) {
-      __ ldar_w(R0_tos, Rtemp);
-    } else if (state == atos) {
-      if (UseCompressedOops) {
-        __ ldar_w(R0_tos, Rtemp);
-        __ decode_heap_oop(R0_tos);
-      } else {
-        __ ldar(R0_tos, Rtemp);
-      }
-      __ verify_oop(R0_tos);
-    } else if (state == ftos) {
-      __ ldar_w(R0_tos, Rtemp);
-      __ fmov_sw(S0_tos, R0_tos);
-    } else {
-      ShouldNotReachHere();
-    }
-    __ b(done);
-
-    __ bind(notVolatile);
-  }
-#endif // AARCH64
-
   if (state == itos) {
     __ ldr_s32(R0_tos, Address(Robj, Roffset));
   } else if (state == atos) {
     do_oop_load(_masm, R0_tos, Address(Robj, Roffset));
     __ verify_oop(R0_tos);
   } else if (state == ftos) {
-#ifdef AARCH64
-    __ ldr_s(S0_tos, Address(Robj, Roffset));
-#else
 #ifdef __SOFTFP__
     __ ldr(R0_tos, Address(Robj, Roffset));
 #else
     __ add(Roffset, Robj, Roffset);
     __ flds(S0_tos, Address(Roffset));
 #endif // __SOFTFP__
-#endif // AARCH64
   } else {
     ShouldNotReachHere();
   }
 
-#ifndef AARCH64
   if (gen_volatile_check) {
     // Check for volatile load
     Label notVolatile;
@@ -4100,7 +3622,6 @@ void TemplateTable::fast_xaccess(TosState state) {
 
     __ bind(notVolatile);
   }
-#endif // !AARCH64
 
   __ bind(done);
 }
@@ -4370,7 +3891,6 @@ void TemplateTable::invokeinterface(int byte_no) {
 void TemplateTable::invokehandle(int byte_no) {
   transition(vtos, vtos);
 
-  // TODO-AARCH64 review register usage
   const Register Rrecv  = R2_tmp;
   const Register Rmtype = R4_tmp;
   const Register R5_method = R5_tmp;  // can't reuse Rmethod!
@@ -4392,7 +3912,6 @@ void TemplateTable::invokehandle(int byte_no) {
 void TemplateTable::invokedynamic(int byte_no) {
   transition(vtos, vtos);
 
-  // TODO-AARCH64 review register usage
   const Register Rcallsite = R4_tmp;
   const Register R5_method = R5_tmp;  // can't reuse Rmethod!
 
@@ -4448,15 +3967,10 @@ void TemplateTable::_new() {
   const int tags_offset = Array<u1>::base_offset_in_bytes();
   __ add(Rtemp, Rtags, Rindex);
 
-#ifdef AARCH64
-  __ add(Rtemp, Rtemp, tags_offset);
-  __ ldarb(Rtemp, Rtemp);
-#else
   __ ldrb(Rtemp, Address(Rtemp, tags_offset));
 
   // use Rklass as a scratch
   volatile_barrier(MacroAssembler::LoadLoad, Rklass);
-#endif // AARCH64
 
   // get InstanceKlass
   __ cmp(Rtemp, JVM_CONSTANT_Class);
@@ -4521,11 +4035,7 @@ void TemplateTable::_new() {
       Label retry;
       __ bind(retry);
 
-#ifdef AARCH64
-      __ ldxr(Robj, Rheap_top_addr);
-#else
       __ ldr(Robj, Address(Rheap_top_addr));
-#endif // AARCH64
 
       __ ldr(Rheap_end, Address(Rheap_top_addr, (intptr_t)Universe::heap()->end_addr()-(intptr_t)Universe::heap()->top_addr()));
       __ add(Rheap_top, Robj, Rsize);
@@ -4534,13 +4044,8 @@ void TemplateTable::_new() {
 
       // Update heap top atomically.
       // If someone beats us on the allocation, try again, otherwise continue.
-#ifdef AARCH64
-      __ stxr(Rtemp2, Rheap_top, Rheap_top_addr);
-      __ cbnz_w(Rtemp2, retry);
-#else
       __ atomic_cas_bool(Robj, Rheap_top, Rheap_top_addr, 0, Rheap_end/*scratched*/);
       __ b(retry, ne);
-#endif // AARCH64
 
       __ incr_allocated_bytes(Rsize, Rtemp);
     }
@@ -4569,21 +4074,6 @@ void TemplateTable::_new() {
     __ bind(L);
 #endif
 
-#ifdef AARCH64
-    {
-      Label loop;
-      // Step back by 1 word if object size is not a multiple of 2*wordSize.
-      assert(wordSize <= sizeof(oopDesc), "oop header should contain at least one word");
-      __ andr(Rtemp2, Rsize, (uintx)wordSize);
-      __ sub(Rzero_cur, Rzero_cur, Rtemp2);
-
-      // Zero by 2 words per iteration.
-      __ bind(loop);
-      __ subs(Rsize, Rsize, 2*wordSize);
-      __ stp(ZR, ZR, Address(Rzero_cur, 2*wordSize, post_indexed));
-      __ b(loop, gt);
-    }
-#else
     __ mov(Rzero0, 0);
     __ mov(Rzero1, 0);
     __ add(Rzero_end, Rzero_cur, Rsize);
@@ -4600,7 +4090,6 @@ void TemplateTable::_new() {
       __ cmp(Rzero_cur, Rzero_end, ne);
       __ b(loop, ne);
     }
-#endif // AARCH64
 
     // initialize object header only.
     __ bind(initialize_header);
@@ -4613,9 +4102,6 @@ void TemplateTable::_new() {
     __ str(Rtemp, Address(Robj, oopDesc::mark_offset_in_bytes()));
 
     // klass
-#ifdef AARCH64
-    __ store_klass_gap(Robj);
-#endif // AARCH64
     __ store_klass(Rklass, Robj); // blows Rklass:
     Rklass = noreg;
 
@@ -4706,19 +4192,11 @@ void TemplateTable::checkcast() {
 
   // See if bytecode has already been quicked
   __ add(Rtemp, Rtags, Rindex);
-#ifdef AARCH64
-  // TODO-AARCH64: investigate if LoadLoad barrier is needed here or control dependency is enough
-  __ add(Rtemp, Rtemp, Array<u1>::base_offset_in_bytes());
-  __ ldarb(Rtemp, Rtemp); // acts as LoadLoad memory barrier
-#else
   __ ldrb(Rtemp, Address(Rtemp, Array<u1>::base_offset_in_bytes()));
-#endif // AARCH64
 
   __ cmp(Rtemp, JVM_CONSTANT_Class);
 
-#ifndef AARCH64
   volatile_barrier(MacroAssembler::LoadLoad, Rtemp, true);
-#endif // !AARCH64
 
   __ b(quicked, eq);
 
@@ -4786,18 +4264,10 @@ void TemplateTable::instanceof() {
 
   // See if bytecode has already been quicked
   __ add(Rtemp, Rtags, Rindex);
-#ifdef AARCH64
-  // TODO-AARCH64: investigate if LoadLoad barrier is needed here or control dependency is enough
-  __ add(Rtemp, Rtemp, Array<u1>::base_offset_in_bytes());
-  __ ldarb(Rtemp, Rtemp); // acts as LoadLoad memory barrier
-#else
   __ ldrb(Rtemp, Address(Rtemp, Array<u1>::base_offset_in_bytes()));
-#endif // AARCH64
   __ cmp(Rtemp, JVM_CONSTANT_Class);
 
-#ifndef AARCH64
   volatile_barrier(MacroAssembler::LoadLoad, Rtemp, true);
-#endif // !AARCH64
 
   __ b(quicked, eq);
 
@@ -4853,11 +4323,7 @@ void TemplateTable::_breakpoint() {
   __ mov(R1, Rmethod);
   __ mov(R2, Rbcp);
   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), R1, R2);
-#ifdef AARCH64
-  __ sxtw(Rtmp_save0, R0);
-#else
   __ mov(Rtmp_save0, R0);
-#endif // AARCH64
 
   // post the breakpoint event
   __ mov(R1, Rmethod);
@@ -4929,16 +4395,11 @@ void TemplateTable::monitorenter() {
                                  // points to word before bottom of monitor block
 
     __ cmp(Rcur, Rbottom);                       // check if there are no monitors
-#ifndef AARCH64
     __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
                                                  // prefetch monitor's object for the first iteration
-#endif // !AARCH64
     __ b(allocate_monitor, eq);                  // there are no monitors, skip searching
 
     __ bind(loop);
-#ifdef AARCH64
-    __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()));
-#endif // AARCH64
     __ cmp(Rcur_obj, 0);                         // check if current entry is used
     __ mov(Rentry, Rcur, eq);                    // if not used then remember entry
 
@@ -4948,10 +4409,8 @@ void TemplateTable::monitorenter() {
     __ add(Rcur, Rcur, entry_size);              // otherwise advance to next entry
 
     __ cmp(Rcur, Rbottom);                       // check if bottom reached
-#ifndef AARCH64
     __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
                                                  // prefetch monitor's object for the next iteration
-#endif // !AARCH64
     __ b(loop, ne);                              // if not at bottom then check this entry
     __ bind(exit);
   }
@@ -4966,12 +4425,6 @@ void TemplateTable::monitorenter() {
 
     // 1. compute new pointers
 
-#ifdef AARCH64
-    __ check_extended_sp(Rtemp);
-    __ sub(SP, SP, entry_size);                  // adjust extended SP
-    __ mov(Rtemp, SP);
-    __ str(Rtemp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize));
-#endif // AARCH64
 
     __ ldr(Rentry, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize));
                                                  // old monitor block top / expression stack bottom
@@ -4989,21 +4442,14 @@ void TemplateTable::monitorenter() {
     // 2. move expression stack contents
 
     __ cmp(R2_tmp, Rentry);                                 // check if expression stack is empty
-#ifndef AARCH64
     __ ldr(Rtemp, Address(R2_tmp, entry_size), ne);         // load expression stack word from old location
-#endif // !AARCH64
     __ b(allocated, eq);
 
     __ bind(loop);
-#ifdef AARCH64
-    __ ldr(Rtemp, Address(R2_tmp, entry_size));             // load expression stack word from old location
-#endif // AARCH64
     __ str(Rtemp, Address(R2_tmp, wordSize, post_indexed)); // store expression stack word at new location
                                                             // and advance to next word
     __ cmp(R2_tmp, Rentry);                                 // check if bottom reached
-#ifndef AARCH64
     __ ldr(Rtemp, Address(R2, entry_size), ne);             // load expression stack word from old location
-#endif // !AARCH64
     __ b(loop, ne);                                         // if not at bottom then copy next word
   }
 
@@ -5052,24 +4498,17 @@ void TemplateTable::monitorexit() {
                                  // points to word before bottom of monitor block
 
     __ cmp(Rcur, Rbottom);                       // check if bottom reached
-#ifndef AARCH64
     __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
                                                  // prefetch monitor's object for the first iteration
-#endif // !AARCH64
     __ b(throw_exception, eq);                   // throw exception if there are now monitors
 
     __ bind(loop);
-#ifdef AARCH64
-    __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()));
-#endif // AARCH64
     // check if current entry is for same object
     __ cmp(Rcur_obj, Robj);
     __ b(found, eq);                             // if same object then stop searching
     __ add(Rcur, Rcur, entry_size);              // otherwise advance to next entry
     __ cmp(Rcur, Rbottom);                       // check if bottom reached
-#ifndef AARCH64
     __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
-#endif // !AARCH64
     __ b (loop, ne);                             // if not at bottom then check this entry
   }
 
diff --git a/src/hotspot/cpu/arm/vm_version_arm.hpp b/src/hotspot/cpu/arm/vm_version_arm.hpp
index 06508e9a9e7..79f47913e55 100644
--- a/src/hotspot/cpu/arm/vm_version_arm.hpp
+++ b/src/hotspot/cpu/arm/vm_version_arm.hpp
@@ -41,24 +41,6 @@ class VM_Version: public Abstract_VM_Version {
   static void initialize();
   static bool is_initialized()      { return _is_initialized; }
 
-#ifdef AARCH64
-
- public:
-  static bool supports_ldrex()         { return true; }
-  static bool supports_ldrexd()        { return true; }
-  static bool supports_movw()          { return true; }
-
-  // Override Abstract_VM_Version implementation
-  static bool use_biased_locking();
-
-  static bool has_simd()               { return _has_simd; }
-  static bool has_vfp()                { return has_simd(); }
-  static bool simd_math_is_compliant() { return true; }
-  static bool has_multiprocessing_extensions() { return true; }
-
-  static bool prefer_moves_over_load_literal() { return true; }
-
-#else
 
  protected:
   enum Feature_Flag {
@@ -126,7 +108,6 @@ class VM_Version: public Abstract_VM_Version {
 
   friend class VM_Version_StubGenerator;
 
-#endif // AARCH64
 };
 
 #endif // CPU_ARM_VM_VM_VERSION_ARM_HPP
diff --git a/src/hotspot/cpu/arm/vm_version_arm_64.cpp b/src/hotspot/cpu/arm/vm_version_arm_64.cpp
deleted file mode 100644
index 43da23b7fab..00000000000
--- a/src/hotspot/cpu/arm/vm_version_arm_64.cpp
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "jvm.h"
-#include "asm/macroAssembler.inline.hpp"
-#include "memory/resourceArea.hpp"
-#include "runtime/java.hpp"
-#include "runtime/os.inline.hpp"
-#include "runtime/stubCodeGenerator.hpp"
-#include "vm_version_arm.hpp"
-#include <sys/auxv.h>
-#include <asm/hwcap.h>
-
-#ifndef HWCAP_AES
-#define HWCAP_AES 1 << 3
-#endif
-
-bool VM_Version::_is_initialized = false;
-bool VM_Version::_has_simd = false;
-
-extern "C" {
-  typedef bool (*check_simd_t)();
-}
-
-
-#ifdef COMPILER2
-
-#define __ _masm->
-
-class VM_Version_StubGenerator: public StubCodeGenerator {
- public:
-
-  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
-
-  address generate_check_simd() {
-    StubCodeMark mark(this, "VM_Version", "check_simd");
-    address start = __ pc();
-
-    __ vcnt(Stemp, Stemp);
-    __ mov(R0, 1);
-    __ ret(LR);
-
-    return start;
-  };
-};
-
-#undef __
-
-#endif
-
-
-
-extern "C" address check_simd_fault_instr;
-
-
-void VM_Version::initialize() {
-  ResourceMark rm;
-
-  // Making this stub must be FIRST use of assembler
-  const int stub_size = 128;
-  BufferBlob* stub_blob = BufferBlob::create("get_cpu_info", stub_size);
-  if (stub_blob == NULL) {
-    vm_exit_during_initialization("Unable to allocate get_cpu_info stub");
-  }
-
-  if (UseFMA) {
-    warning("FMA instructions are not available on this CPU");
-    FLAG_SET_DEFAULT(UseFMA, false);
-  }
-
-  if (UseSHA) {
-    warning("SHA instructions are not available on this CPU");
-    FLAG_SET_DEFAULT(UseSHA, false);
-  }
-
-  if (UseSHA1Intrinsics) {
-    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
-    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
-  }
-
-  if (UseSHA256Intrinsics) {
-    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
-    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
-  }
-
-  if (UseSHA512Intrinsics) {
-    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
-    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
-  }
-
-  if (UseCRC32Intrinsics) {
-    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
-      warning("CRC32 intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
-  }
-
-  if (UseCRC32CIntrinsics) {
-    if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
-      warning("CRC32C intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
-  }
-
-  if (UseAdler32Intrinsics) {
-    warning("Adler32 intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
-  }
-
-  if (UseVectorizedMismatchIntrinsic) {
-    warning("vectorizedMismatch intrinsic is not available on this CPU.");
-    FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
-  }
-
-  CodeBuffer c(stub_blob);
-
-#ifdef COMPILER2
-  VM_Version_StubGenerator g(&c);
-
-  address check_simd_pc = g.generate_check_simd();
-  if (check_simd_pc != NULL) {
-    check_simd_t check_simd = CAST_TO_FN_PTR(check_simd_t, check_simd_pc);
-    check_simd_fault_instr = (address)check_simd;
-    _has_simd = check_simd();
-  } else {
-    assert(! _has_simd, "default _has_simd value must be 'false'");
-  }
-#endif
-
-  unsigned long auxv = getauxval(AT_HWCAP);
-
-  char buf[512];
-  jio_snprintf(buf, sizeof(buf), "AArch64%s",
-               ((auxv & HWCAP_AES) ? ", aes" : ""));
-
-  _features_string = os::strdup(buf);
-
-#ifdef COMPILER2
-  if (auxv & HWCAP_AES) {
-    if (FLAG_IS_DEFAULT(UseAES)) {
-      FLAG_SET_DEFAULT(UseAES, true);
-    }
-    if (!UseAES) {
-      if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
-        warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
-      }
-      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
-    } else {
-      if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
-        FLAG_SET_DEFAULT(UseAESIntrinsics, true);
-      }
-    }
-  } else
-#endif
-  if (UseAES || UseAESIntrinsics) {
-    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
-      warning("AES instructions are not available on this CPU");
-      FLAG_SET_DEFAULT(UseAES, false);
-    }
-    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
-      warning("AES intrinsics are not available on this CPU");
-      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
-    }
-  }
-
-  if (UseAESCTRIntrinsics) {
-    warning("AES/CTR intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
-  }
-
-  _supports_cx8 = true;
-  _supports_atomic_getset4 = true;
-  _supports_atomic_getadd4 = true;
-  _supports_atomic_getset8 = true;
-  _supports_atomic_getadd8 = true;
-
-  // TODO-AARCH64 revise C2 flags
-
-  if (has_simd()) {
-    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
-      FLAG_SET_DEFAULT(UsePopCountInstruction, true);
-    }
-  }
-
-  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
-    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 128);
-  }
-
-#ifdef COMPILER2
-  FLAG_SET_DEFAULT(UseFPUForSpilling, true);
-
-  if (FLAG_IS_DEFAULT(MaxVectorSize)) {
-    // FLAG_SET_DEFAULT(MaxVectorSize, has_simd() ? 16 : 8);
-    // SIMD/NEON can use 16, but default is 8 because currently
-    // larger than 8 will disable instruction scheduling
-    FLAG_SET_DEFAULT(MaxVectorSize, 8);
-  }
-
-  if (MaxVectorSize > 16) {
-    FLAG_SET_DEFAULT(MaxVectorSize, 8);
-  }
-#endif
-
-  if (FLAG_IS_DEFAULT(Tier4CompileThreshold)) {
-    Tier4CompileThreshold = 10000;
-  }
-  if (FLAG_IS_DEFAULT(Tier3InvocationThreshold)) {
-    Tier3InvocationThreshold = 1000;
-  }
-  if (FLAG_IS_DEFAULT(Tier3CompileThreshold)) {
-    Tier3CompileThreshold = 5000;
-  }
-  if (FLAG_IS_DEFAULT(Tier3MinInvocationThreshold)) {
-    Tier3MinInvocationThreshold = 500;
-  }
-
-  FLAG_SET_DEFAULT(TypeProfileLevel, 0); // unsupported
-
-  // This machine does not allow unaligned memory accesses
-  if (UseUnalignedAccesses) {
-    if (!FLAG_IS_DEFAULT(UseUnalignedAccesses))
-      warning("Unaligned memory access is not available on this CPU");
-    FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
-  }
-
-  _is_initialized = true;
-}
-
-bool VM_Version::use_biased_locking() {
-  // TODO-AARCH64 measure performance and revise
-
-  // The cost of CAS on uniprocessor ARM v6 and later is low compared to the
-  // overhead related to slightly longer Biased Locking execution path.
-  // Testing shows no improvement when running with Biased Locking enabled
-  // on an ARMv6 and higher uniprocessor systems.  The situation is different on
-  // ARMv5 and MP systems.
-  //
-  // Therefore the Biased Locking is enabled on ARMv5 and ARM MP only.
-  //
-  return os::is_MP();
-}
diff --git a/src/hotspot/cpu/arm/vm_version_ext_arm.cpp b/src/hotspot/cpu/arm/vm_version_ext_arm.cpp
index d653964e33e..0e2a1ffa136 100644
--- a/src/hotspot/cpu/arm/vm_version_ext_arm.cpp
+++ b/src/hotspot/cpu/arm/vm_version_ext_arm.cpp
@@ -49,11 +49,7 @@ void VM_Version_Ext::initialize_cpu_information(void) {
   _no_of_cores  = os::processor_count();
   _no_of_threads = _no_of_cores;
   _no_of_sockets = _no_of_cores;
-#ifdef AARCH64
-  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "AArch64");
-#else
   snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "ARM%d", _arm_arch);
-#endif
   snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "%s", _features_string);
   _initialized = true;
 }
diff --git a/src/hotspot/cpu/arm/vtableStubs_arm.cpp b/src/hotspot/cpu/arm/vtableStubs_arm.cpp
index 591a021c050..2c564b81895 100644
--- a/src/hotspot/cpu/arm/vtableStubs_arm.cpp
+++ b/src/hotspot/cpu/arm/vtableStubs_arm.cpp
@@ -92,7 +92,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
     int method_offset = vtableEntry::method_offset_in_bytes() + entry_offset;
 
     assert ((method_offset & (wordSize - 1)) == 0, "offset should be aligned");
-    int offset_mask = AARCH64_ONLY(0xfff << LogBytesPerWord) NOT_AARCH64(0xfff);
+    int offset_mask = 0xfff;
     if (method_offset & ~offset_mask) {
       __ add(tmp, tmp, method_offset & ~offset_mask);
     }
@@ -109,12 +109,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
 #endif
 
   address ame_addr = __ pc();
-#ifdef AARCH64
-  __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset()));
-  __ br(tmp);
-#else
   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
-#endif // AARCH64
 
   masm->flush();
   bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
@@ -150,9 +145,9 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
   assert(VtableStub::receiver_location() == R0->as_VMReg(), "receiver expected in R0");
 
   // R0-R3 / R0-R7 registers hold the arguments and cannot be spoiled
-  const Register Rclass  = AARCH64_ONLY(R9)  NOT_AARCH64(R4);
-  const Register Rintf   = AARCH64_ONLY(R10) NOT_AARCH64(R5);
-  const Register Rscan   = AARCH64_ONLY(R11) NOT_AARCH64(R6);
+  const Register Rclass  = R4;
+  const Register Rintf   = R5;
+  const Register Rscan   = R6;
 
   Label L_no_such_interface;
 
@@ -200,12 +195,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
 
   address ame_addr = __ pc();
 
-#ifdef AARCH64
-  __ ldr(Rtemp, Address(Rmethod, Method::from_compiled_offset()));
-  __ br(Rtemp);
-#else
   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
-#endif // AARCH64
 
   __ bind(L_no_such_interface);
   // Handle IncompatibleClassChangeError in itable stubs.
diff --git a/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp b/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp
index 893db24e18e..709fe1e9c40 100644
--- a/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp
+++ b/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp
@@ -44,7 +44,6 @@
  * kernel source or kernel_user_helpers.txt in Linux Doc.
  */
 
-#ifndef AARCH64
 template<>
 template<typename T>
 inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
@@ -61,18 +60,9 @@ inline void Atomic::PlatformStore<8>::operator()(T store_value,
   (*os::atomic_store_long_func)(
     PrimitiveConversions::cast<int64_t>(store_value), reinterpret_cast<volatile int64_t*>(dest));
 }
-#endif
 
 // As per atomic.hpp all read-modify-write operations have to provide two-way
-// barriers semantics. For AARCH64 we are using load-acquire-with-reservation and
-// store-release-with-reservation. While load-acquire combined with store-release
-// do not generally form two-way barriers, their use with reservations does - the
-// ARMv8 architecture manual Section F "Barrier Litmus Tests" indicates they
-// provide sequentially consistent semantics. All we need to add is an explicit
-// barrier in the failure path of the cmpxchg operations (as these don't execute
-// the store) - arguably this may be overly cautious as there is a very low
-// likelihood that the hardware would pull loads/stores into the region guarded
-// by the reservation.
+// barriers semantics.
 //
 // For ARMv7 we add explicit barriers in the stubs.
 
@@ -90,45 +80,9 @@ inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
                                                atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(I));
   STATIC_ASSERT(4 == sizeof(D));
-#ifdef AARCH64
-  D val;
-  int tmp;
-  __asm__ volatile(
-    "1:\n\t"
-    " ldaxr %w[val], [%[dest]]\n\t"
-    " add %w[val], %w[val], %w[add_val]\n\t"
-    " stlxr %w[tmp], %w[val], [%[dest]]\n\t"
-    " cbnz %w[tmp], 1b\n\t"
-    : [val] "=&r" (val), [tmp] "=&r" (tmp)
-    : [add_val] "r" (add_value), [dest] "r" (dest)
-    : "memory");
-  return val;
-#else
   return add_using_helper<int32_t>(os::atomic_add_func, add_value, dest);
-#endif
 }
 
-#ifdef AARCH64
-template<>
-template<typename I, typename D>
-inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
-                                               atomic_memory_order order) const {
-  STATIC_ASSERT(8 == sizeof(I));
-  STATIC_ASSERT(8 == sizeof(D));
-  D val;
-  int tmp;
-  __asm__ volatile(
-    "1:\n\t"
-    " ldaxr %[val], [%[dest]]\n\t"
-    " add %[val], %[val], %[add_val]\n\t"
-    " stlxr %w[tmp], %[val], [%[dest]]\n\t"
-    " cbnz %w[tmp], 1b\n\t"
-    : [val] "=&r" (val), [tmp] "=&r" (tmp)
-    : [add_val] "r" (add_value), [dest] "r" (dest)
-    : "memory");
-  return val;
-}
-#endif
 
 template<>
 template<typename T>
@@ -136,43 +90,9 @@ inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
                                              T volatile* dest,
                                              atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
-#ifdef AARCH64
-  T old_val;
-  int tmp;
-  __asm__ volatile(
-    "1:\n\t"
-    " ldaxr %w[old_val], [%[dest]]\n\t"
-    " stlxr %w[tmp], %w[new_val], [%[dest]]\n\t"
-    " cbnz %w[tmp], 1b\n\t"
-    : [old_val] "=&r" (old_val), [tmp] "=&r" (tmp)
-    : [new_val] "r" (exchange_value), [dest] "r" (dest)
-    : "memory");
-  return old_val;
-#else
   return xchg_using_helper<int32_t>(os::atomic_xchg_func, exchange_value, dest);
-#endif
 }
 
-#ifdef AARCH64
-template<>
-template<typename T>
-inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
-                                             T volatile* dest,
-                                             atomic_memory_order order) const {
-  STATIC_ASSERT(8 == sizeof(T));
-  T old_val;
-  int tmp;
-  __asm__ volatile(
-    "1:\n\t"
-    " ldaxr %[old_val], [%[dest]]\n\t"
-    " stlxr %w[tmp], %[new_val], [%[dest]]\n\t"
-    " cbnz %w[tmp], 1b\n\t"
-    : [old_val] "=&r" (old_val), [tmp] "=&r" (tmp)
-    : [new_val] "r" (exchange_value), [dest] "r" (dest)
-    : "memory");
-  return old_val;
-}
-#endif // AARCH64
 
 // The memory_order parameter is ignored - we always provide the strongest/most-conservative ordering
 
@@ -180,7 +100,6 @@ inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
 template<>
 struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
 
-#ifndef AARCH64
 
 inline int32_t reorder_cmpxchg_func(int32_t exchange_value,
                                     int32_t volatile* dest,
@@ -197,7 +116,6 @@ inline int64_t reorder_cmpxchg_long_func(int64_t exchange_value,
   return (*os::atomic_cmpxchg_long_func)(compare_value, exchange_value, dest);
 }
 
-#endif // !AARCH64
 
 template<>
 template<typename T>
@@ -206,27 +124,7 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
                                                 T compare_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(4 == sizeof(T));
-#ifdef AARCH64
-  T rv;
-  int tmp;
-  __asm__ volatile(
-    "1:\n\t"
-    " ldaxr %w[rv], [%[dest]]\n\t"
-    " cmp %w[rv], %w[cv]\n\t"
-    " b.ne 2f\n\t"
-    " stlxr %w[tmp], %w[ev], [%[dest]]\n\t"
-    " cbnz %w[tmp], 1b\n\t"
-    " b 3f\n\t"
-    "2:\n\t"
-    " dmb sy\n\t"
-    "3:\n\t"
-    : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
-    : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
-    : "memory");
-  return rv;
-#else
   return cmpxchg_using_helper<int32_t>(reorder_cmpxchg_func, exchange_value, dest, compare_value);
-#endif
 }
 
 template<>
@@ -236,27 +134,7 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
                                                 T compare_value,
                                                 atomic_memory_order order) const {
   STATIC_ASSERT(8 == sizeof(T));
-#ifdef AARCH64
-  T rv;
-  int tmp;
-  __asm__ volatile(
-    "1:\n\t"
-    " ldaxr %[rv], [%[dest]]\n\t"
-    " cmp %[rv], %[cv]\n\t"
-    " b.ne 2f\n\t"
-    " stlxr %w[tmp], %[ev], [%[dest]]\n\t"
-    " cbnz %w[tmp], 1b\n\t"
-    " b 3f\n\t"
-    "2:\n\t"
-    " dmb sy\n\t"
-    "3:\n\t"
-    : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
-    : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
-    : "memory");
-  return rv;
-#else
   return cmpxchg_using_helper<int64_t>(reorder_cmpxchg_long_func, exchange_value, dest, compare_value);
-#endif
 }
 
 #endif // OS_CPU_LINUX_ARM_VM_ATOMIC_LINUX_ARM_HPP
diff --git a/src/hotspot/os_cpu/linux_arm/copy_linux_arm.inline.hpp b/src/hotspot/os_cpu/linux_arm/copy_linux_arm.inline.hpp
index f9f7fddb2b2..06d91b928e1 100644
--- a/src/hotspot/os_cpu/linux_arm/copy_linux_arm.inline.hpp
+++ b/src/hotspot/os_cpu/linux_arm/copy_linux_arm.inline.hpp
@@ -58,37 +58,18 @@ static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t co
 }
 
 static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
-#ifdef AARCH64
-  _Copy_conjoint_jints_atomic(from, to, count * BytesPerInt);
-#else
   assert(HeapWordSize == BytesPerInt, "heapwords and jints must be the same size");
   // pd_conjoint_words is word-atomic in this implementation.
   pd_conjoint_words((const HeapWord*)from, (HeapWord*)to, count);
-#endif
 }
 
 static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
-#ifdef AARCH64
-  assert(HeapWordSize == BytesPerLong, "64-bit architecture");
-  pd_conjoint_words((const HeapWord*)from, (HeapWord*)to, count);
-#else
   _Copy_conjoint_jlongs_atomic(from, to, count * BytesPerLong);
-#endif
 }
 
 static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
-#ifdef AARCH64
-  if (UseCompressedOops) {
-    assert(BytesPerHeapOop == BytesPerInt, "compressed oops");
-    pd_conjoint_jints_atomic((const jint*)from, (jint*)to, count);
-  } else {
-    assert(BytesPerHeapOop == BytesPerLong, "64-bit architecture");
-    pd_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
-  }
-#else
   assert(BytesPerHeapOop == BytesPerInt, "32-bit architecture");
   pd_conjoint_jints_atomic((const jint*)from, (jint*)to, count);
-#endif
 }
 
 static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
diff --git a/src/hotspot/os_cpu/linux_arm/globals_linux_arm.hpp b/src/hotspot/os_cpu/linux_arm/globals_linux_arm.hpp
index 78a713a053c..497322093a4 100644
--- a/src/hotspot/os_cpu/linux_arm/globals_linux_arm.hpp
+++ b/src/hotspot/os_cpu/linux_arm/globals_linux_arm.hpp
@@ -30,16 +30,10 @@
 // (see globals.hpp)
 //
 define_pd_global(bool, DontYieldALot,            false);
-#ifdef AARCH64
-define_pd_global(intx, CompilerThreadStackSize,  1024);
-define_pd_global(intx, ThreadStackSize,          1024);
-define_pd_global(intx, VMThreadStackSize,        1024);
-#else
 define_pd_global(intx, CompilerThreadStackSize,  512);
 // System default ThreadStackSize appears to be 512 which is too big.
 define_pd_global(intx, ThreadStackSize,          320);
 define_pd_global(intx, VMThreadStackSize,        512);
-#endif // AARCH64
 
 define_pd_global(size_t, JVMInvokeMethodSlack,   8192);
 
diff --git a/src/hotspot/os_cpu/linux_arm/linux_arm_32.s b/src/hotspot/os_cpu/linux_arm/linux_arm_32.s
index ededd0fe6e3..c1c8fd42815 100644
--- a/src/hotspot/os_cpu/linux_arm/linux_arm_32.s
+++ b/src/hotspot/os_cpu/linux_arm/linux_arm_32.s
@@ -1,4 +1,4 @@
-# 
+#
 # Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
@@ -19,15 +19,15 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-# 
+#
+
 
-	
         # NOTE WELL!  The _Copy functions are called directly
 	# from server-compiler-generated code via CallLeafNoFP,
 	# which means that they *must* either not use floating
 	# point or use it in the same manner as does the server
 	# compiler.
-	
+
         .globl _Copy_conjoint_bytes
 	.type _Copy_conjoint_bytes, %function
         .globl _Copy_arrayof_conjoint_bytes
diff --git a/src/hotspot/os_cpu/linux_arm/linux_arm_64.s b/src/hotspot/os_cpu/linux_arm/linux_arm_64.s
deleted file mode 100644
index ab861293da6..00000000000
--- a/src/hotspot/os_cpu/linux_arm/linux_arm_64.s
+++ /dev/null
@@ -1,542 +0,0 @@
-# 
-# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-# 
-
-        # TODO-AARCH64
-        
-        # NOTE WELL!  The _Copy functions are called directly
-        # from server-compiler-generated code via CallLeafNoFP,
-        # which means that they *must* either not use floating
-        # point or use it in the same manner as does the server
-        # compiler.
-        
-        .globl _Copy_conjoint_bytes
-        .type _Copy_conjoint_bytes, %function
-        .globl _Copy_arrayof_conjoint_bytes
-        .type _Copy_arrayof_conjoint_bytes, %function
-        .globl _Copy_disjoint_words
-        .type _Copy_disjoint_words, %function
-        .globl _Copy_conjoint_words
-        .type _Copy_conjoint_words, %function
-        .globl _Copy_conjoint_jshorts_atomic
-        .type _Copy_conjoint_jshorts_atomic, %function
-        .globl _Copy_arrayof_conjoint_jshorts
-        .type _Copy_arrayof_conjoint_jshorts, %function
-        .globl _Copy_conjoint_jints_atomic
-        .type _Copy_conjoint_jints_atomic, %function
-        .globl _Copy_arrayof_conjoint_jints
-        .type _Copy_arrayof_conjoint_jints, %function
-        .globl _Copy_conjoint_jlongs_atomic
-        .type _Copy_conjoint_jlongs_atomic, %function
-        .globl _Copy_arrayof_conjoint_jlongs
-        .type _Copy_arrayof_conjoint_jlongs, %function
-
-        .text
-        .globl  SpinPause
-        .type SpinPause, %function
-SpinPause:
-        yield
-        ret
-
-        # Support for void Copy::conjoint_bytes(void* from,
-        #                                       void* to,
-        #                                       size_t count)
-_Copy_conjoint_bytes:
-        hlt 1002
-
-        # Support for void Copy::arrayof_conjoint_bytes(void* from,
-        #                                               void* to,
-        #                                               size_t count)
-_Copy_arrayof_conjoint_bytes:
-        hlt 1003
-
-
-        # Support for void Copy::disjoint_words(void* from,
-        #                                       void* to,
-        #                                       size_t count)
-_Copy_disjoint_words:
-        # These and further memory prefetches may hit out of array ranges.
-        # Experiments showed that prefetching of inaccessible memory doesn't result in exceptions.
-        prfm    pldl1keep,  [x0, #0]
-        prfm    pstl1keep,  [x1, #0]
-        prfm    pldl1keep,  [x0, #64]
-        prfm    pstl1keep,  [x1, #64]
-
-        subs    x18, x2,  #128
-        b.ge    dw_large
-
-dw_lt_128:
-        # Copy [x0, x0 + x2) to [x1, x1 + x2)
-        
-        adr     x15,  dw_tail_table_base
-        and     x16,  x2,  #~8
-
-        # Calculate address to jump and store it to x15:
-        #   Each pair of instructions before dw_tail_table_base copies 16 bytes.
-        #   x16 is count of bytes to copy aligned down by 16.
-        #   So x16/16 pairs of instructions should be executed. 
-        #   Each pair takes 8 bytes, so x15 = dw_tail_table_base - (x16/16)*8 = x15 - x16/2
-        sub     x15,  x15, x16, lsr #1
-        prfm    plil1keep, [x15]
-    
-        add     x17,  x0,  x2
-        add     x18,  x1,  x2
-
-        # If x2 = x16 + 8, then copy 8 bytes and x16 bytes after that.
-        # Otherwise x2 = x16, so proceed to copy x16 bytes.
-        tbz     x2, #3, dw_lt_128_even
-        ldr     x3, [x0]
-        str     x3, [x1]
-dw_lt_128_even:
-        # Copy [x17 - x16, x17) to [x18 - x16, x18)
-        # x16 is aligned by 16 and less than 128
-
-        # Execute (x16/16) ldp-stp pairs; each pair copies 16 bytes
-        br      x15
-
-        ldp     x3,  x4,  [x17, #-112]
-        stp     x3,  x4,  [x18, #-112]
-        ldp     x5,  x6,  [x17, #-96]
-        stp     x5,  x6,  [x18, #-96]
-        ldp     x7,  x8,  [x17, #-80]
-        stp     x7,  x8,  [x18, #-80]
-        ldp     x9,  x10, [x17, #-64]
-        stp     x9,  x10, [x18, #-64]
-        ldp     x11, x12, [x17, #-48]
-        stp     x11, x12, [x18, #-48]
-        ldp     x13, x14, [x17, #-32]
-        stp     x13, x14, [x18, #-32]
-        ldp     x15, x16, [x17, #-16]
-        stp     x15, x16, [x18, #-16]
-dw_tail_table_base:
-        ret
-
-.p2align  6
-.rept   12
-        nop
-.endr
-dw_large:
-        # x18 >= 0;
-        # Copy [x0, x0 + x18 + 128) to [x1, x1 + x18 + 128)
-
-        ldp     x3,  x4,  [x0], #64
-        ldp     x5,  x6,  [x0, #-48]
-        ldp     x7,  x8,  [x0, #-32]
-        ldp     x9,  x10, [x0, #-16]
-
-        # Before and after each iteration of loop registers x3-x10 contain [x0 - 64, x0),
-        # and x1 is a place to copy this data;
-        # x18 contains number of bytes to be stored minus 128
-
-        # Exactly 16 instructions from p2align, so dw_loop starts from cache line boundary
-        # Checking it explictly by aligning with "hlt 1000" instructions 
-.p2alignl  6, 0xd4407d00
-dw_loop:
-        prfm    pldl1keep,  [x0, #64]
-        # Next line actually hurted memory copy performance (for interpreter) - JDK-8078120
-        # prfm    pstl1keep,  [x1, #64]
-
-        subs    x18, x18, #64
-
-        stp     x3,  x4,  [x1, #0]
-        ldp     x3,  x4,  [x0, #0]
-        stp     x5,  x6,  [x1, #16]
-        ldp     x5,  x6,  [x0, #16]
-        stp     x7,  x8,  [x1, #32]
-        ldp     x7,  x8,  [x0, #32]
-        stp     x9,  x10, [x1, #48]
-        ldp     x9,  x10, [x0, #48]
-        
-        add     x1,  x1,  #64
-        add     x0,  x0,  #64
-
-        b.ge    dw_loop
-
-        # 13 instructions from dw_loop, so the loop body hits into one cache line
-
-dw_loop_end:
-        adds    x2,  x18, #64
-
-        stp     x3,  x4,  [x1], #64
-        stp     x5,  x6,  [x1, #-48]
-        stp     x7,  x8,  [x1, #-32]
-        stp     x9,  x10, [x1, #-16]
-
-        # Increased x18 by 64, but stored 64 bytes, so x2 contains exact number of bytes to be stored
-
-        # If this number is not zero, also copy remaining bytes
-        b.ne    dw_lt_128
-        ret
-
-
-        # Support for void Copy::conjoint_words(void* from,
-        #                                       void* to,
-        #                                       size_t count)
-_Copy_conjoint_words:
-        subs    x3, x1, x0
-        # hi condition is met <=> from < to
-        ccmp    x2, x3, #0, hi
-        # hi condition is met <=> (from < to) and (to - from < count)
-        # otherwise _Copy_disjoint_words may be used, because it performs forward copying,
-        # so it also works when ranges overlap but to <= from
-        b.ls    _Copy_disjoint_words
-
-        # Overlapping case should be the rare one, it does not worth optimizing
-
-        ands    x3,  x2,  #~8
-        # x3 is count aligned down by 2*wordSize
-        add     x0,  x0,  x2
-        add     x1,  x1,  x2
-        sub     x3,  x3,  #16
-        # Skip loop if 0 or 1 words
-        b.eq    cw_backward_loop_end
-
-        # x3 >= 0
-        # Copy [x0 - x3 - 16, x0) to [x1 - x3 - 16, x1) backward
-cw_backward_loop:
-        subs    x3,  x3,  #16
-        ldp     x4,  x5,  [x0, #-16]!
-        stp     x4,  x5,  [x1, #-16]!
-        b.ge    cw_backward_loop
-
-cw_backward_loop_end:
-        # Copy remaining 0 or 1 words
-        tbz     x2,  #3,  cw_finish
-        ldr     x3, [x0, #-8]
-        str     x3, [x1, #-8]
-
-cw_finish:
-        ret
-
-
-        # Support for void Copy::conjoint_jshorts_atomic(void* from,
-        #                                                void* to,
-        #                                                size_t count)
-_Copy_conjoint_jshorts_atomic:
-        add     x17, x0, x2
-        add     x18, x1, x2
-
-        subs    x3, x1, x0
-        # hi is met <=> (from < to) and (to - from < count)
-        ccmp    x2, x3, #0, hi
-        b.hi    cs_backward
-        
-        subs    x3, x2, #14
-        b.ge    cs_forward_loop
-
-        # Copy x2 < 14 bytes from x0 to x1
-cs_forward_lt14:
-        ands    x7, x2, #7
-        tbz     x2, #3, cs_forward_lt8
-        ldrh    w3, [x0, #0]
-        ldrh    w4, [x0, #2]
-        ldrh    w5, [x0, #4]
-        ldrh    w6, [x0, #6]
-
-        strh    w3, [x1, #0]
-        strh    w4, [x1, #2]
-        strh    w5, [x1, #4]
-        strh    w6, [x1, #6]
-
-        # Copy x7 < 8 bytes from x17 - x7 to x18 - x7
-cs_forward_lt8:
-        b.eq    cs_forward_0
-        cmp     x7, #4
-        b.lt    cs_forward_2
-        b.eq    cs_forward_4
-
-cs_forward_6:
-        ldrh    w3, [x17, #-6]
-        strh    w3, [x18, #-6]
-cs_forward_4:
-        ldrh    w4, [x17, #-4]
-        strh    w4, [x18, #-4]
-cs_forward_2:
-        ldrh    w5, [x17, #-2]
-        strh    w5, [x18, #-2]
-cs_forward_0:
-        ret
-
-
-        # Copy [x0, x0 + x3 + 14) to [x1, x1 + x3 + 14)
-        # x3 >= 0
-.p2align 6
-cs_forward_loop:
-        subs    x3, x3, #14
-        
-        ldrh    w4, [x0], #14
-        ldrh    w5, [x0, #-12]
-        ldrh    w6, [x0, #-10]
-        ldrh    w7, [x0, #-8]
-        ldrh    w8, [x0, #-6]
-        ldrh    w9, [x0, #-4]
-        ldrh    w10, [x0, #-2]
-
-        strh    w4, [x1], #14
-        strh    w5, [x1, #-12]
-        strh    w6, [x1, #-10]
-        strh    w7, [x1, #-8]
-        strh    w8, [x1, #-6]
-        strh    w9, [x1, #-4]
-        strh    w10, [x1, #-2]
-
-        b.ge    cs_forward_loop
-        # Exactly 16 instruction from cs_forward_loop, so loop fits into one cache line
-
-        adds    x2, x3, #14
-        # x2 bytes should be copied from x0 to x1
-        b.ne    cs_forward_lt14
-        ret
-        
-        # Very similar to forward copying
-cs_backward:
-        subs    x3, x2, #14
-        b.ge    cs_backward_loop
-
-cs_backward_lt14:
-        ands    x7, x2, #7
-        tbz     x2, #3, cs_backward_lt8
-
-        ldrh    w3, [x17, #-8]
-        ldrh    w4, [x17, #-6]
-        ldrh    w5, [x17, #-4]
-        ldrh    w6, [x17, #-2]
-        
-        strh    w3, [x18, #-8]
-        strh    w4, [x18, #-6]
-        strh    w5, [x18, #-4]
-        strh    w6, [x18, #-2]
-
-cs_backward_lt8:
-        b.eq    cs_backward_0
-        cmp     x7, #4
-        b.lt    cs_backward_2
-        b.eq    cs_backward_4
-
-cs_backward_6:
-        ldrh    w3, [x0, #4]
-        strh    w3, [x1, #4]
-
-cs_backward_4:
-        ldrh    w4, [x0, #2]
-        strh    w4, [x1, #2]
-
-cs_backward_2:
-        ldrh    w5, [x0, #0]
-        strh    w5, [x1, #0]
-
-cs_backward_0:
-        ret
-
-
-.p2align 6
-cs_backward_loop:
-        subs    x3, x3, #14
-
-        ldrh    w4, [x17, #-14]!
-        ldrh    w5, [x17, #2]
-        ldrh    w6, [x17, #4]
-        ldrh    w7, [x17, #6]
-        ldrh    w8, [x17, #8]
-        ldrh    w9, [x17, #10]
-        ldrh    w10, [x17, #12]
-
-        strh    w4, [x18, #-14]!
-        strh    w5, [x18, #2]
-        strh    w6, [x18, #4]
-        strh    w7, [x18, #6]
-        strh    w8, [x18, #8]
-        strh    w9, [x18, #10]
-        strh    w10, [x18, #12]
-
-        b.ge    cs_backward_loop
-        adds    x2, x3, #14
-        b.ne    cs_backward_lt14
-        ret
-
-
-        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
-        #                                                 void* to,
-        #                                                 size_t count)
-_Copy_arrayof_conjoint_jshorts:
-        hlt 1007
-
-
-        # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
-        #                                               jlong* to,
-        #                                               size_t count)
-_Copy_conjoint_jlongs_atomic:
-_Copy_arrayof_conjoint_jlongs:
-        hlt 1009
-
-
-        # Support for void Copy::conjoint_jints_atomic(void* from,
-        #                                              void* to,
-        #                                              size_t count)
-_Copy_conjoint_jints_atomic:
-_Copy_arrayof_conjoint_jints:
-        # These and further memory prefetches may hit out of array ranges.
-        # Experiments showed that prefetching of inaccessible memory doesn't result in exceptions.
-        prfm    pldl1keep,  [x0, #0]
-        prfm    pstl1keep,  [x1, #0]
-        prfm    pldl1keep,  [x0, #32]
-        prfm    pstl1keep,  [x1, #32]
-
-        subs    x3, x1, x0
-        # hi condition is met <=> from < to
-        ccmp    x2, x3, #0, hi
-        # hi condition is met <=> (from < to) and (to - from < count)
-        b.hi    ci_backward
-
-        subs    x18, x2,  #64
-        b.ge    ci_forward_large
-
-ci_forward_lt_64:
-        # Copy [x0, x0 + x2) to [x1, x1 + x2)
-        
-        adr     x15,  ci_forward_tail_table_base
-        and     x16,  x2,  #~4
-
-        # Calculate address to jump and store it to x15:
-        #   Each pair of instructions before ci_forward_tail_table_base copies 8 bytes.
-        #   x16 is count of bytes to copy aligned down by 8.
-        #   So x16/8 pairs of instructions should be executed. 
-        #   Each pair takes 8 bytes, so x15 = ci_forward_tail_table_base - (x16/8)*8 = x15 - x16
-        sub     x15,  x15, x16
-        prfm    plil1keep, [x15]
-    
-        add     x17,  x0,  x2
-        add     x18,  x1,  x2
-
-        # If x2 = x16 + 4, then copy 4 bytes and x16 bytes after that.
-        # Otherwise x2 = x16, so proceed to copy x16 bytes.
-        tbz     x2, #2, ci_forward_lt_64_even
-        ldr     w3, [x0]
-        str     w3, [x1]
-ci_forward_lt_64_even:
-        # Copy [x17 - x16, x17) to [x18 - x16, x18)
-        # x16 is aligned by 8 and less than 64
-
-        # Execute (x16/8) ldp-stp pairs; each pair copies 8 bytes
-        br      x15
-
-        ldp     w3,  w4,  [x17, #-56]
-        stp     w3,  w4,  [x18, #-56]
-        ldp     w5,  w6,  [x17, #-48]
-        stp     w5,  w6,  [x18, #-48]
-        ldp     w7,  w8,  [x17, #-40]
-        stp     w7,  w8,  [x18, #-40]
-        ldp     w9,  w10, [x17, #-32]
-        stp     w9,  w10, [x18, #-32]
-        ldp     w11, w12, [x17, #-24]
-        stp     w11, w12, [x18, #-24]
-        ldp     w13, w14, [x17, #-16]
-        stp     w13, w14, [x18, #-16]
-        ldp     w15, w16, [x17, #-8]
-        stp     w15, w16, [x18, #-8]
-ci_forward_tail_table_base:
-        ret
-
-.p2align  6
-.rept   12
-        nop
-.endr
-ci_forward_large:
-        # x18 >= 0;
-        # Copy [x0, x0 + x18 + 64) to [x1, x1 + x18 + 64)
-
-        ldp     w3,  w4,  [x0], #32
-        ldp     w5,  w6,  [x0, #-24]
-        ldp     w7,  w8,  [x0, #-16]
-        ldp     w9,  w10, [x0, #-8]
-
-        # Before and after each iteration of loop registers w3-w10 contain [x0 - 32, x0),
-        # and x1 is a place to copy this data;
-        # x18 contains number of bytes to be stored minus 64
-
-        # Exactly 16 instructions from p2align, so ci_forward_loop starts from cache line boundary
-        # Checking it explictly by aligning with "hlt 1000" instructions 
-.p2alignl  6, 0xd4407d00
-ci_forward_loop:
-        prfm    pldl1keep,  [x0, #32]
-        prfm    pstl1keep,  [x1, #32]
-
-        subs    x18, x18, #32
-
-        stp     w3,  w4,  [x1, #0]
-        ldp     w3,  w4,  [x0, #0]
-        stp     w5,  w6,  [x1, #8]
-        ldp     w5,  w6,  [x0, #8]
-        stp     w7,  w8,  [x1, #16]
-        ldp     w7,  w8,  [x0, #16]
-        stp     w9,  w10, [x1, #24]
-        ldp     w9,  w10, [x0, #24]
-        
-        add     x1,  x1,  #32
-        add     x0,  x0,  #32
-
-        b.ge    ci_forward_loop
-
-        # 14 instructions from ci_forward_loop, so the loop body hits into one cache line
-
-ci_forward_loop_end:
-        adds    x2,  x18, #32
-
-        stp     w3,  w4,  [x1], #32
-        stp     w5,  w6,  [x1, #-24]
-        stp     w7,  w8,  [x1, #-16]
-        stp     w9,  w10, [x1, #-8]
-
-        # Increased x18 by 32, but stored 32 bytes, so x2 contains exact number of bytes to be stored
-
-        # If this number is not zero, also copy remaining bytes
-        b.ne    ci_forward_lt_64
-        ret
-
-ci_backward:
-
-        # Overlapping case should be the rare one, it does not worth optimizing
-
-        ands    x3,  x2,  #~4
-        # x3 is count aligned down by 2*jintSize
-        add     x0,  x0,  x2
-        add     x1,  x1,  x2
-        sub     x3,  x3,  #8
-        # Skip loop if 0 or 1 jints
-        b.eq    ci_backward_loop_end
-
-        # x3 >= 0
-        # Copy [x0 - x3 - 8, x0) to [x1 - x3 - 8, x1) backward
-ci_backward_loop:
-        subs    x3,  x3,  #8
-        ldp     w4,  w5,  [x0, #-8]!
-        stp     w4,  w5,  [x1, #-8]!
-        b.ge    ci_backward_loop
-
-ci_backward_loop_end:
-        # Copy remaining 0 or 1 jints
-        tbz     x2,  #2,  ci_backward_finish
-        ldr     w3, [x0, #-4]
-        str     w3, [x1, #-4]
-
-ci_backward_finish:
-        ret
diff --git a/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.hpp b/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.hpp
index 48adb948a64..8f672ce55b4 100644
--- a/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.hpp
+++ b/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.hpp
@@ -32,8 +32,7 @@
 
 // Implementation of class OrderAccess.
 // - we define the high level barriers below and use the general
-//   implementation in orderAccess.hpp, with customizations
-//   on AARCH64 via the specialized_* template functions
+//   implementation in orderAccess.hpp.
 
 // Memory Ordering on ARM is weak.
 //
@@ -61,9 +60,6 @@ inline static void dmb_sy() {
    if (!os::is_MP()) {
      return;
    }
-#ifdef AARCH64
-   __asm__ __volatile__ ("dmb sy" : : : "memory");
-#else
    if (VM_Version::arm_arch() >= 7) {
 #ifdef __thumb__
      __asm__ volatile (
@@ -78,16 +74,12 @@ inline static void dmb_sy() {
        "mcr p15, 0, %0, c7, c10, 5"
        : : "r" (zero) : "memory");
    }
-#endif
 }
 
 inline static void dmb_st() {
    if (!os::is_MP()) {
      return;
    }
-#ifdef AARCH64
-   __asm__ __volatile__ ("dmb st" : : : "memory");
-#else
    if (VM_Version::arm_arch() >= 7) {
 #ifdef __thumb__
      __asm__ volatile (
@@ -102,19 +94,11 @@ inline static void dmb_st() {
        "mcr p15, 0, %0, c7, c10, 5"
        : : "r" (zero) : "memory");
    }
-#endif
 }
 
 // Load-Load/Store barrier
 inline static void dmb_ld() {
-#ifdef AARCH64
-   if (!os::is_MP()) {
-     return;
-   }
-   __asm__ __volatile__ ("dmb ld" : : : "memory");
-#else
    dmb_sy();
-#endif
 }
 
 
@@ -126,123 +110,4 @@ inline void OrderAccess::storeload()  { dmb_sy(); }
 inline void OrderAccess::release()    { dmb_sy(); }
 inline void OrderAccess::fence()      { dmb_sy(); }
 
-// specializations for Aarch64
-// TODO-AARCH64: evaluate effectiveness of ldar*/stlr* implementations compared to 32-bit ARM approach
-
-#ifdef AARCH64
-
-template<>
-struct OrderAccess::PlatformOrderedLoad<1, X_ACQUIRE>
-{
-  template <typename T>
-  T operator()(const volatile T* p) const {
-    volatile T result;
-    __asm__ volatile(
-      "ldarb %w[res], [%[ptr]]"
-      : [res] "=&r" (result)
-      : [ptr] "r" (p)
-      : "memory");
-    return result;
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedLoad<2, X_ACQUIRE>
-{
-  template <typename T>
-  T operator()(const volatile T* p) const {
-    volatile T result;
-    __asm__ volatile(
-      "ldarh %w[res], [%[ptr]]"
-      : [res] "=&r" (result)
-      : [ptr] "r" (p)
-      : "memory");
-    return result;
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedLoad<4, X_ACQUIRE>
-{
-  template <typename T>
-  T operator()(const volatile T* p) const {
-    volatile T result;
-    __asm__ volatile(
-      "ldar %w[res], [%[ptr]]"
-      : [res] "=&r" (result)
-      : [ptr] "r" (p)
-      : "memory");
-    return result;
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedLoad<8, X_ACQUIRE>
-{
-  template <typename T>
-  T operator()(const volatile T* p) const {
-    volatile T result;
-    __asm__ volatile(
-      "ldar %[res], [%[ptr]]"
-      : [res] "=&r" (result)
-      : [ptr] "r" (p)
-      : "memory");
-    return result;
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm__ volatile(
-      "stlrb %w[val], [%[ptr]]"
-      :
-      : [ptr] "r" (p), [val] "r" (v)
-      : "memory");
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm__ volatile(
-      "stlrh %w[val], [%[ptr]]"
-      :
-      : [ptr] "r" (p), [val] "r" (v)
-      : "memory");
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm__ volatile(
-      "stlr %w[val], [%[ptr]]"
-      :
-      : [ptr] "r" (p), [val] "r" (v)
-      : "memory");
-  }
-};
-
-template<>
-struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
-{
-  template <typename T>
-  void operator()(T v, volatile T* p) const {
-    __asm__ volatile(
-      "stlr %[val], [%[ptr]]"
-      :
-      : [ptr] "r" (p), [val] "r" (v)
-      : "memory");
-  }
-};
-
-#endif // AARCH64
-
 #endif // OS_CPU_LINUX_ARM_VM_ORDERACCESS_LINUX_ARM_HPP
diff --git a/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp b/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp
index 8f54f0748db..3d6214e2512 100644
--- a/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp
+++ b/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp
@@ -78,7 +78,7 @@
 
 // Don't #define SPELL_REG_FP for thumb because it is not safe to use, so this makes sure we never fetch it.
 #ifndef __thumb__
-#define SPELL_REG_FP  AARCH64_ONLY("x29") NOT_AARCH64("fp")
+#define SPELL_REG_FP "fp"
 #endif
 
 address os::current_stack_pointer() {
@@ -91,15 +91,6 @@ char* os::non_memory_address_word() {
   return (char*) -1;
 }
 
-#ifdef AARCH64
-
-#define arm_pc pc
-#define arm_sp sp
-#define arm_fp regs[29]
-#define arm_r0 regs[0]
-#define ARM_REGS_IN_CONTEXT  31
-
-#else
 
 #if NGREG == 16
 // These definitions are based on the observation that until
@@ -115,7 +106,6 @@ char* os::non_memory_address_word() {
 
 #define ARM_REGS_IN_CONTEXT  16
 
-#endif // AARCH64
 
 address os::Linux::ucontext_get_pc(const ucontext_t* uc) {
   return (address)uc->uc_mcontext.arm_pc;
@@ -256,7 +246,6 @@ frame os::current_frame() {
 #endif
 }
 
-#ifndef AARCH64
 extern "C" address check_vfp_fault_instr;
 extern "C" address check_vfp3_32_fault_instr;
 extern "C" address check_mp_ext_fault_instr;
@@ -264,7 +253,6 @@ extern "C" address check_mp_ext_fault_instr;
 address check_vfp_fault_instr = NULL;
 address check_vfp3_32_fault_instr = NULL;
 address check_mp_ext_fault_instr = NULL;
-#endif // !AARCH64
 extern "C" address check_simd_fault_instr;
 address check_simd_fault_instr = NULL;
 
@@ -284,9 +272,9 @@ extern "C" int JVM_handle_linux_signal(int sig, siginfo_t* info,
 
   if (sig == SIGILL &&
       ((info->si_addr == (caddr_t)check_simd_fault_instr)
-       NOT_AARCH64(|| info->si_addr == (caddr_t)check_vfp_fault_instr)
-       NOT_AARCH64(|| info->si_addr == (caddr_t)check_vfp3_32_fault_instr)
-       NOT_AARCH64(|| info->si_addr == (caddr_t)check_mp_ext_fault_instr))) {
+       || info->si_addr == (caddr_t)check_vfp_fault_instr
+       || info->si_addr == (caddr_t)check_vfp3_32_fault_instr
+       || info->si_addr == (caddr_t)check_mp_ext_fault_instr)) {
     // skip faulty instruction + instruction that sets return value to
     // success and set return value to failure.
     os::Linux::ucontext_set_pc(uc, (address)info->si_addr + 8);
@@ -517,9 +505,6 @@ void os::Linux::set_fpu_control_word(int fpu_control) {
 }
 
 void os::setup_fpu() {
-#ifdef AARCH64
-  __asm__ volatile ("msr fpcr, xzr");
-#else
 #if !defined(__SOFTFP__) && defined(__VFP_FP__)
   // Turn on IEEE-754 compliant VFP mode
   __asm__ volatile (
@@ -528,7 +513,6 @@ void os::setup_fpu() {
     : /* no output */ : /* no input */ : "r0"
   );
 #endif
-#endif // AARCH64
 }
 
 bool os::is_allocatable(size_t bytes) {
@@ -564,14 +548,8 @@ void os::print_context(outputStream *st, const void *context) {
     st->print_cr("  %-3s = " INTPTR_FORMAT, as_Register(r)->name(), reg_area[r]);
   }
 #define U64_FORMAT "0x%016llx"
-#ifdef AARCH64
-  st->print_cr("  %-3s = " U64_FORMAT, "sp", uc->uc_mcontext.sp);
-  st->print_cr("  %-3s = " U64_FORMAT, "pc", uc->uc_mcontext.pc);
-  st->print_cr("  %-3s = " U64_FORMAT, "pstate", uc->uc_mcontext.pstate);
-#else
   // now print flag register
   st->print_cr("  %-4s = 0x%08lx", "cpsr",uc->uc_mcontext.arm_cpsr);
-#endif
   st->cr();
 
   intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
@@ -600,16 +578,10 @@ void os::print_register_info(outputStream *st, const void *context) {
     print_location(st, reg_area[r]);
     st->cr();
   }
-#ifdef AARCH64
-  st->print_cr("  %-3s = " U64_FORMAT, "pc", uc->uc_mcontext.pc);
-  print_location(st, uc->uc_mcontext.pc);
-  st->cr();
-#endif
   st->cr();
 }
 
 
-#ifndef AARCH64
 
 typedef int64_t cmpxchg_long_func_t(int64_t, int64_t, volatile int64_t*);
 
@@ -719,7 +691,6 @@ int32_t os::atomic_cmpxchg_bootstrap(int32_t compare_value, int32_t exchange_val
   return old_value;
 }
 
-#endif // !AARCH64
 
 #ifndef PRODUCT
 void os::verify_stack_alignment() {
diff --git a/src/hotspot/os_cpu/linux_arm/os_linux_arm.hpp b/src/hotspot/os_cpu/linux_arm/os_linux_arm.hpp
index 2ed55a72a49..a2b6e4a43bf 100644
--- a/src/hotspot/os_cpu/linux_arm/os_linux_arm.hpp
+++ b/src/hotspot/os_cpu/linux_arm/os_linux_arm.hpp
@@ -28,11 +28,7 @@
 #ifndef __thumb__
   enum {
     // Offset to add to frame::_fp when dealing with non-thumb C frames
-#ifdef AARCH64
-    C_frame_offset =  0,
-#else
     C_frame_offset =  -1,
-#endif
   };
 #endif
 
@@ -44,7 +40,6 @@
   // Note: Currently only used in 64 bit Windows implementations
   static bool register_code_area(char *low, char *high) { return true; }
 
-#ifndef AARCH64
   static int64_t (*atomic_cmpxchg_long_func)(int64_t compare_value,
                                              int64_t exchange_value,
                                              volatile int64_t *dest);
@@ -74,6 +69,5 @@
   static int32_t  atomic_cmpxchg_bootstrap(int32_t compare_value,
                                            int32_t exchange_value,
                                            volatile int32_t *dest);
-#endif // !AARCH64
 
 #endif // OS_CPU_LINUX_ARM_VM_OS_LINUX_ARM_HPP
diff --git a/src/hotspot/os_cpu/linux_arm/prefetch_linux_arm.inline.hpp b/src/hotspot/os_cpu/linux_arm/prefetch_linux_arm.inline.hpp
index 9f2cacd2c4c..5fa93c92024 100644
--- a/src/hotspot/os_cpu/linux_arm/prefetch_linux_arm.inline.hpp
+++ b/src/hotspot/os_cpu/linux_arm/prefetch_linux_arm.inline.hpp
@@ -28,21 +28,13 @@
 #include "runtime/prefetch.hpp"
 
 inline void Prefetch::read (void *loc, intx interval) {
-#ifdef AARCH64
-  __asm__ volatile ("prfm PLDL1KEEP, [%0]" : : "r" (loc));
-#else
 #if defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_5TE__)
   __asm__ volatile ("pld [%0]" : : "r" (loc));
 #endif
-#endif // AARCH64
 }
 
 inline void Prefetch::write(void *loc, intx interval) {
-#ifdef AARCH64
-  __asm__ volatile ("prfm PSTL1KEEP, [%0]" : : "r" (loc));
-#else
   // Not available on 32-bit ARM (prior to ARMv7 with MP extensions)
-#endif // AARCH64
 }
 
 #endif // OS_CPU_LINUX_ARM_VM_PREFETCH_LINUX_ARM_INLINE_HPP
diff --git a/src/hotspot/os_cpu/linux_arm/thread_linux_arm.cpp b/src/hotspot/os_cpu/linux_arm/thread_linux_arm.cpp
index e4461404947..66cc7637519 100644
--- a/src/hotspot/os_cpu/linux_arm/thread_linux_arm.cpp
+++ b/src/hotspot/os_cpu/linux_arm/thread_linux_arm.cpp
@@ -32,17 +32,12 @@
 
 frame JavaThread::pd_last_frame() {
   assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
-#ifdef AARCH64
-  assert (_anchor.last_Java_pc() != NULL, "pc should be stored");
-  return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
-#else
   if (_anchor.last_Java_pc() != NULL) {
     return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
   } else {
     // This will pick up pc from sp
     return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
   }
-#endif // AARCH64
 }
 
 void JavaThread::cache_global_variables() {
@@ -84,7 +79,7 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
 
   // If we have a last_Java_frame, then we should use it even if
   // isInJava == true.  It should be more reliable than ucontext info.
-  if (jt->has_last_Java_frame() AARCH64_ONLY(&& jt->last_Java_pc() != NULL)) {
+  if (jt->has_last_Java_frame()) {
     *fr_addr = jt->pd_last_frame();
     return true;
   }
diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
index 6605ab367c7..49f3eb4a72c 100644
--- a/src/hotspot/share/utilities/macros.hpp
+++ b/src/hotspot/share/utilities/macros.hpp
@@ -571,10 +571,9 @@
 #define NOT_E500V2(code) code
 #endif
 
-// Note: There are three ARM ports. They set the following in the makefiles:
-// 1. Closed 32-bit port:   -DARM -DARM32           -DTARGET_ARCH_arm
-// 2. Closed 64-bit port:   -DARM -DAARCH64 -D_LP64 -DTARGET_ARCH_arm
-// 3. Open   64-bit port:         -DAARCH64 -D_LP64 -DTARGET_ARCH_aaarch64
+// Note: There are two ARM ports. They set the following in the makefiles:
+// 1. 32-bit port:   -DARM -DARM32 -DTARGET_ARCH_arm
+// 2. 64-bit port:   -DAARCH64 -D_LP64 -DTARGET_ARCH_aaarch64
 #ifdef ARM
 #define ARM_ONLY(code) code
 #define NOT_ARM(code)
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
index 7d7a6107cab..cea3f6504ef 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
@@ -72,17 +72,6 @@ public static boolean knownCPU(String cpu) {
   public static String getCPU() throws UnsupportedPlatformException {
     String cpu = System.getProperty("os.arch");
 
-    // Let any additional CPU mangling fire first
-    try {
-      Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed");
-      AltPlatformInfo api = (AltPlatformInfo) pic.newInstance();
-      if (api.knownCPU(cpu)) {
-        return api.getCPU(cpu);
-      }
-    } catch (Exception e) {
-       // Ignored
-    }
-
     // Check that CPU is supported
     if (!knownCPU(cpu)) {
        throw new UnsupportedPlatformException("CPU type " + cpu + " not yet supported");
diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
index 127bb6abcd9..e595fb80729 100644
--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
+++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
@@ -232,22 +232,6 @@ void recursiveCall() {
         }
     }
 
-    private static boolean isAlwaysSupportedPlatform() {
-        // Note: To date Aarch64 is the only platform that we don't statically
-        // know if it supports the reserved stack area. This is because the
-        // open Aarch64 port supports it and the Oracle arm64 port does not.
-        return Platform.isAix() ||
-            (Platform.isLinux() &&
-             (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
-              Platform.isX86())) ||
-            Platform.isOSX() ||
-            Platform.isSolaris();
-    }
-
-    private static boolean isNeverSupportedPlatform() {
-        return !isAlwaysSupportedPlatform() && !Platform.isAArch64();
-    }
-
     private static boolean isSupportedPlatform;
 
     private static void initIsSupportedPlatform() throws Exception {
@@ -272,19 +256,11 @@ private static void initIsSupportedPlatform() throws Exception {
 
         // Do a sanity check. Some platforms we know are always supported. Make sure
         // we didn't determine that one of those platforms is not supported.
-        if (!isSupportedPlatform && isAlwaysSupportedPlatform()) {
+        if (!isSupportedPlatform) {
             String msg  = "This platform should be supported: " + Platform.getOsArch();
             System.err.println("FAILED: " +  msg);
             throw new RuntimeException(msg);
         }
-
-        // And some platforms we know are never supported. Make sure
-        // we didn't determine that one of those platforms is supported.
-        if (isSupportedPlatform && isNeverSupportedPlatform()) {
-            String msg  = "This platform should not be supported: " + Platform.getOsArch();
-            System.err.println("FAILED: " +  msg);
-            throw new RuntimeException(msg);
-        }
     }
 
     public static void main(String[] args) throws Exception {