Inline Float32x4 min,max,sqrt,reciprocal,reciprocalSqrt, and scale

R=srdjan@google.com

Review URL: https://codereview.chromium.org//14432004

git-svn-id: https://dart.googlecode.com/svn/branches/bleeding_edge/dart@22185 260f80e4-7a28-3924-810f-c04153c831b5
diff --git a/runtime/vm/flow_graph_optimizer.cc b/runtime/vm/flow_graph_optimizer.cc
index c7fabe7..555b859 100644
--- a/runtime/vm/flow_graph_optimizer.cc
+++ b/runtime/vm/flow_graph_optimizer.cc
@@ -1761,6 +1761,57 @@
         ReplaceCall(call, cmp);
         return true;
       }
+      case MethodRecognizer::kFloat32x4Min:
+      case MethodRecognizer::kFloat32x4Max: {
+        Definition* left = call->ArgumentAt(0);
+        Definition* right = call->ArgumentAt(1);
+        // Type check left.
+        AddCheckClass(left,
+                      ICData::ZoneHandle(
+                          call->ic_data()->AsUnaryClassChecksForArgNr(0)),
+                      call->deopt_id(),
+                      call->env(),
+                      call);
+        Float32x4MinMaxInstr* minmax =
+            new Float32x4MinMaxInstr(recognized_kind, new Value(left),
+                                     new Value(right), call);
+        ReplaceCall(call, minmax);
+        return true;
+      }
+      case MethodRecognizer::kFloat32x4Scale: {
+        Definition* left = call->ArgumentAt(0);
+        Definition* right = call->ArgumentAt(1);
+        // Type check left.
+        AddCheckClass(left,
+                      ICData::ZoneHandle(
+                          call->ic_data()->AsUnaryClassChecksForArgNr(0)),
+                      call->deopt_id(),
+                      call->env(),
+                      call);
+        // Left and right values are swapped when handed to the instruction,
+        // this is done so that the double value is loaded into the output
+        // register and can be destroyed.
+        Float32x4ScaleInstr* scale =
+            new Float32x4ScaleInstr(recognized_kind, new Value(right),
+                                    new Value(left), call);
+        ReplaceCall(call, scale);
+        return true;
+      }
+      case MethodRecognizer::kFloat32x4Sqrt:
+      case MethodRecognizer::kFloat32x4ReciprocalSqrt:
+      case MethodRecognizer::kFloat32x4Reciprocal: {
+        Definition* left = call->ArgumentAt(0);
+        AddCheckClass(left,
+              ICData::ZoneHandle(
+                  call->ic_data()->AsUnaryClassChecksForArgNr(0)),
+              call->deopt_id(),
+              call->env(),
+              call);
+        Float32x4SqrtInstr* sqrt =
+            new Float32x4SqrtInstr(recognized_kind, new Value(left), call);
+        ReplaceCall(call, sqrt);
+        return true;
+      }
       default:
         return false;
     }
@@ -4871,6 +4922,21 @@
 }
 
 
+void ConstantPropagator::VisitFloat32x4MinMax(Float32x4MinMaxInstr* instr) {
+  SetValue(instr, non_constant_);
+}
+
+
+void ConstantPropagator::VisitFloat32x4Scale(Float32x4ScaleInstr* instr) {
+  SetValue(instr, non_constant_);
+}
+
+
+void ConstantPropagator::VisitFloat32x4Sqrt(Float32x4SqrtInstr* instr) {
+  SetValue(instr, non_constant_);
+}
+
+
 void ConstantPropagator::VisitMathSqrt(MathSqrtInstr* instr) {
   const Object& value = instr->value()->definition()->constant_value();
   if (IsNonConstant(value)) {
diff --git a/runtime/vm/flow_graph_type_propagator.cc b/runtime/vm/flow_graph_type_propagator.cc
index 2d3d4de..253697a 100644
--- a/runtime/vm/flow_graph_type_propagator.cc
+++ b/runtime/vm/flow_graph_type_propagator.cc
@@ -996,6 +996,21 @@
 }
 
 
+CompileType Float32x4MinMaxInstr::ComputeType() const {
+  return CompileType::FromCid(kFloat32x4Cid);
+}
+
+
+CompileType Float32x4ScaleInstr::ComputeType() const {
+  return CompileType::FromCid(kFloat32x4Cid);
+}
+
+
+CompileType Float32x4SqrtInstr::ComputeType() const {
+  return CompileType::FromCid(kFloat32x4Cid);
+}
+
+
 CompileType MathSqrtInstr::ComputeType() const {
   return CompileType::FromCid(kDoubleCid);
 }
diff --git a/runtime/vm/il_printer.cc b/runtime/vm/il_printer.cc
index 1808509..bf50596 100644
--- a/runtime/vm/il_printer.cc
+++ b/runtime/vm/il_printer.cc
@@ -642,6 +642,28 @@
 }
 
 
+void Float32x4MinMaxInstr::PrintOperandsTo(BufferFormatter* f) const {
+  f->Print("%s, ", MethodRecognizer::KindToCString(op_kind()));
+  left()->PrintTo(f);
+  f->Print(", ");
+  right()->PrintTo(f);
+}
+
+
+void Float32x4SqrtInstr::PrintOperandsTo(BufferFormatter* f) const {
+  f->Print("%s, ", MethodRecognizer::KindToCString(op_kind()));
+  left()->PrintTo(f);
+}
+
+
+void Float32x4ScaleInstr::PrintOperandsTo(BufferFormatter* f) const {
+  f->Print("%s, ", MethodRecognizer::KindToCString(op_kind()));
+  left()->PrintTo(f);
+  f->Print(", ");
+  right()->PrintTo(f);
+}
+
+
 void BinaryMintOpInstr::PrintOperandsTo(BufferFormatter* f) const {
   f->Print("%s, ", Token::Str(op_kind()));
   left()->PrintTo(f);
diff --git a/runtime/vm/intermediate_language.h b/runtime/vm/intermediate_language.h
index f0affce..6b560f6 100644
--- a/runtime/vm/intermediate_language.h
+++ b/runtime/vm/intermediate_language.h
@@ -88,6 +88,12 @@
   V(_Float32x4, _cmplt, Float32x4LessThan, 548944488)                          \
   V(_Float32x4, _cmplte, Float32x4LessThanOrEqual, 548944488)                  \
   V(_Float32x4, _cmpnequal, Float32x4NotEqual, 548944488)                      \
+  V(_Float32x4, _min, Float32x4Min, 342800599)                                 \
+  V(_Float32x4, _max, Float32x4Max, 342800599)                                 \
+  V(_Float32x4, _scale, Float32x4Scale, 219466242)                             \
+  V(_Float32x4, _sqrt, Float32x4Sqrt, 42621627)                                \
+  V(_Float32x4, _reciprocalSqrt, Float32x4ReciprocalSqrt, 42621627)            \
+  V(_Float32x4, _reciprocal, Float32x4Reciprocal, 42621627)                    \
 
 // Class that recognizes the name and owner of a function and returns the
 // corresponding enum. See RECOGNIZED_LIST above for list of recognizable
@@ -562,6 +568,9 @@
   M(Float32x4Zero)                                                             \
   M(Float32x4Splat)                                                            \
   M(Float32x4Comparison)                                                       \
+  M(Float32x4MinMax)                                                           \
+  M(Float32x4Scale)                                                            \
+  M(Float32x4Sqrt)                                                             \
 
 
 #define FORWARD_DECLARATION(type) class type##Instr;
@@ -823,6 +832,9 @@
   friend class Float32x4ShuffleInstr;
   friend class Float32x4ConstructorInstr;
   friend class Float32x4ComparisonInstr;
+  friend class Float32x4MinMaxInstr;
+  friend class Float32x4ScaleInstr;
+  friend class Float32x4SqrtInstr;
   friend class BinaryMintOpInstr;
   friend class BinarySmiOpInstr;
   friend class UnarySmiOpInstr;
@@ -4476,7 +4488,7 @@
  public:
   Float32x4ComparisonInstr(MethodRecognizer::Kind op_kind, Value* left,
                            Value* right, InstanceCallInstr* instance_call)
-        : op_kind_(op_kind) {
+      : op_kind_(op_kind) {
     SetInputAt(0, left);
     SetInputAt(1, right);
     deopt_id_ = instance_call->deopt_id();
@@ -4523,6 +4535,159 @@
 };
 
 
+class Float32x4MinMaxInstr : public TemplateDefinition<2> {
+ public:
+  Float32x4MinMaxInstr(MethodRecognizer::Kind op_kind, Value* left,
+                       Value* right, InstanceCallInstr* instance_call)
+      : op_kind_(op_kind) {
+    SetInputAt(0, left);
+    SetInputAt(1, right);
+    deopt_id_ = instance_call->deopt_id();
+  }
+
+  Value* left() const { return inputs_[0]; }
+  Value* right() const { return inputs_[1]; }
+
+  MethodRecognizer::Kind op_kind() const { return op_kind_; }
+
+  virtual void PrintOperandsTo(BufferFormatter* f) const;
+
+  virtual bool CanDeoptimize() const { return false; }
+
+  virtual Representation representation() const {
+    return kUnboxedFloat32x4;
+  }
+
+  virtual Representation RequiredInputRepresentation(intptr_t idx) const {
+    ASSERT((idx == 0) || (idx == 1));
+    return kUnboxedFloat32x4;
+  }
+
+  virtual intptr_t DeoptimizationTarget() const {
+    // Direct access since this instruction cannot deoptimize, and the deopt-id
+    // was inherited from another instruction that could deoptimize.
+    return deopt_id_;
+  }
+
+  DECLARE_INSTRUCTION(Float32x4MinMax)
+  virtual CompileType ComputeType() const;
+
+  virtual bool AllowsCSE() const { return true; }
+  virtual EffectSet Effects() const { return EffectSet::None(); }
+  virtual EffectSet Dependencies() const { return EffectSet::None(); }
+  virtual bool AttributesEqual(Instruction* other) const {
+    return op_kind() == other->AsFloat32x4MinMax()->op_kind();
+  }
+
+ private:
+  const MethodRecognizer::Kind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(Float32x4MinMaxInstr);
+};
+
+
+class Float32x4ScaleInstr : public TemplateDefinition<2> {
+ public:
+  Float32x4ScaleInstr(MethodRecognizer::Kind op_kind, Value* left,
+                      Value* right, InstanceCallInstr* instance_call)
+      : op_kind_(op_kind) {
+    SetInputAt(0, left);
+    SetInputAt(1, right);
+    deopt_id_ = instance_call->deopt_id();
+  }
+
+  Value* left() const { return inputs_[0]; }
+  Value* right() const { return inputs_[1]; }
+
+  MethodRecognizer::Kind op_kind() const { return op_kind_; }
+
+  virtual void PrintOperandsTo(BufferFormatter* f) const;
+
+  virtual bool CanDeoptimize() const { return false; }
+
+  virtual Representation representation() const {
+    return kUnboxedFloat32x4;
+  }
+
+  virtual Representation RequiredInputRepresentation(intptr_t idx) const {
+    ASSERT((idx == 0) || (idx == 1));
+    if (idx == 0) {
+      return kUnboxedDouble;
+    }
+    return kUnboxedFloat32x4;
+  }
+
+  virtual intptr_t DeoptimizationTarget() const {
+    // Direct access since this instruction cannot deoptimize, and the deopt-id
+    // was inherited from another instruction that could deoptimize.
+    return deopt_id_;
+  }
+
+  DECLARE_INSTRUCTION(Float32x4Scale)
+  virtual CompileType ComputeType() const;
+
+  virtual bool AllowsCSE() const { return true; }
+  virtual EffectSet Effects() const { return EffectSet::None(); }
+  virtual EffectSet Dependencies() const { return EffectSet::None(); }
+  virtual bool AttributesEqual(Instruction* other) const {
+    return op_kind() == other->AsFloat32x4Scale()->op_kind();
+  }
+
+ private:
+  const MethodRecognizer::Kind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(Float32x4ScaleInstr);
+};
+
+
+class Float32x4SqrtInstr : public TemplateDefinition<1> {
+ public:
+  Float32x4SqrtInstr(MethodRecognizer::Kind op_kind, Value* left,
+                     InstanceCallInstr* instance_call) : op_kind_(op_kind) {
+    SetInputAt(0, left);
+    deopt_id_ = instance_call->deopt_id();
+  }
+
+  Value* left() const { return inputs_[0]; }
+
+  MethodRecognizer::Kind op_kind() const { return op_kind_; }
+
+  virtual void PrintOperandsTo(BufferFormatter* f) const;
+
+  virtual bool CanDeoptimize() const { return false; }
+
+  virtual Representation representation() const {
+    return kUnboxedFloat32x4;
+  }
+
+  virtual Representation RequiredInputRepresentation(intptr_t idx) const {
+    ASSERT(idx == 0);
+    return kUnboxedFloat32x4;
+  }
+
+  virtual intptr_t DeoptimizationTarget() const {
+    // Direct access since this instruction cannot deoptimize, and the deopt-id
+    // was inherited from another instruction that could deoptimize.
+    return deopt_id_;
+  }
+
+  DECLARE_INSTRUCTION(Float32x4Sqrt)
+  virtual CompileType ComputeType() const;
+
+  virtual bool AllowsCSE() const { return true; }
+  virtual EffectSet Effects() const { return EffectSet::None(); }
+  virtual EffectSet Dependencies() const { return EffectSet::None(); }
+  virtual bool AttributesEqual(Instruction* other) const {
+    return op_kind() == other->AsFloat32x4Sqrt()->op_kind();
+  }
+
+ private:
+  const MethodRecognizer::Kind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(Float32x4SqrtInstr);
+};
+
+
 class BinaryMintOpInstr : public TemplateDefinition<2> {
  public:
   BinaryMintOpInstr(Token::Kind op_kind,
diff --git a/runtime/vm/intermediate_language_arm.cc b/runtime/vm/intermediate_language_arm.cc
index c54b687..60f959f 100644
--- a/runtime/vm/intermediate_language_arm.cc
+++ b/runtime/vm/intermediate_language_arm.cc
@@ -2052,6 +2052,39 @@
 }
 
 
+LocationSummary* Float32x4MinMaxInstr::MakeLocationSummary() const {
+  UNIMPLEMENTED();
+  return NULL;
+}
+
+
+void Float32x4MinMaxInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  UNIMPLEMENTED();
+}
+
+
+LocationSummary* Float32x4SqrtInstr::MakeLocationSummary() const {
+  UNIMPLEMENTED();
+  return NULL;
+}
+
+
+void Float32x4SqrtInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  UNIMPLEMENTED();
+}
+
+
+LocationSummary* Float32x4ScaleInstr::MakeLocationSummary() const {
+  UNIMPLEMENTED();
+  return NULL;
+}
+
+
+void Float32x4ScaleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  UNIMPLEMENTED();
+}
+
+
 LocationSummary* MathSqrtInstr::MakeLocationSummary() const {
   UNIMPLEMENTED();
   return NULL;
diff --git a/runtime/vm/intermediate_language_ia32.cc b/runtime/vm/intermediate_language_ia32.cc
index bd279d3..b085eed 100644
--- a/runtime/vm/intermediate_language_ia32.cc
+++ b/runtime/vm/intermediate_language_ia32.cc
@@ -3076,6 +3076,96 @@
 }
 
 
+LocationSummary* Float32x4MinMaxInstr::MakeLocationSummary() const {
+  const intptr_t kNumInputs = 2;
+  const intptr_t kNumTemps = 0;
+  LocationSummary* summary =
+      new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
+  summary->set_in(0, Location::RequiresFpuRegister());
+  summary->set_in(1, Location::RequiresFpuRegister());
+  summary->set_out(Location::SameAsFirstInput());
+  return summary;
+}
+
+
+void Float32x4MinMaxInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  XmmRegister left = locs()->in(0).fpu_reg();
+  XmmRegister right = locs()->in(1).fpu_reg();
+
+  ASSERT(locs()->out().fpu_reg() == left);
+
+  switch (op_kind()) {
+    case MethodRecognizer::kFloat32x4Min:
+      __ minps(left, right);
+      break;
+    case MethodRecognizer::kFloat32x4Max:
+      __ maxps(left, right);
+      break;
+    default: UNREACHABLE();
+  }
+}
+
+
+LocationSummary* Float32x4ScaleInstr::MakeLocationSummary() const {
+  const intptr_t kNumInputs = 2;
+  const intptr_t kNumTemps = 0;
+  LocationSummary* summary =
+      new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
+  summary->set_in(0, Location::RequiresFpuRegister());
+  summary->set_in(1, Location::RequiresFpuRegister());
+  summary->set_out(Location::SameAsFirstInput());
+  return summary;
+}
+
+
+void Float32x4ScaleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  XmmRegister left = locs()->in(0).fpu_reg();
+  XmmRegister right = locs()->in(1).fpu_reg();
+
+  ASSERT(locs()->out().fpu_reg() == left);
+
+  switch (op_kind()) {
+    case MethodRecognizer::kFloat32x4Scale:
+      __ cvtsd2ss(left, left);
+      __ shufps(left, left, Immediate(0x00));
+      __ mulps(left, right);
+      break;
+    default: UNREACHABLE();
+  }
+}
+
+
+LocationSummary* Float32x4SqrtInstr::MakeLocationSummary() const {
+  const intptr_t kNumInputs = 1;
+  const intptr_t kNumTemps = 0;
+  LocationSummary* summary =
+      new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
+  summary->set_in(0, Location::RequiresFpuRegister());
+  summary->set_out(Location::SameAsFirstInput());
+  return summary;
+}
+
+
+void Float32x4SqrtInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  XmmRegister left = locs()->in(0).fpu_reg();
+
+  ASSERT(locs()->out().fpu_reg() == left);
+
+  switch (op_kind()) {
+    case MethodRecognizer::kFloat32x4Sqrt:
+      __ sqrtps(left);
+      break;
+    case MethodRecognizer::kFloat32x4Reciprocal:
+      __ reciprocalps(left);
+      break;
+    case MethodRecognizer::kFloat32x4ReciprocalSqrt:
+      __ rsqrtps(left);
+      break;
+    default: UNREACHABLE();
+  }
+}
+
+
 LocationSummary* MathSqrtInstr::MakeLocationSummary() const {
   const intptr_t kNumInputs = 1;
   const intptr_t kNumTemps = 0;
diff --git a/runtime/vm/intermediate_language_mips.cc b/runtime/vm/intermediate_language_mips.cc
index cd4d0d2..21ba210 100644
--- a/runtime/vm/intermediate_language_mips.cc
+++ b/runtime/vm/intermediate_language_mips.cc
@@ -2107,6 +2107,39 @@
 }
 
 
+LocationSummary* Float32x4MinMaxInstr::MakeLocationSummary() const {
+  UNIMPLEMENTED();
+  return NULL;
+}
+
+
+void Float32x4MinMaxInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  UNIMPLEMENTED();
+}
+
+
+LocationSummary* Float32x4SqrtInstr::MakeLocationSummary() const {
+  UNIMPLEMENTED();
+  return NULL;
+}
+
+
+void Float32x4SqrtInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  UNIMPLEMENTED();
+}
+
+
+LocationSummary* Float32x4ScaleInstr::MakeLocationSummary() const {
+  UNIMPLEMENTED();
+  return NULL;
+}
+
+
+void Float32x4ScaleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  UNIMPLEMENTED();
+}
+
+
 LocationSummary* MathSqrtInstr::MakeLocationSummary() const {
   UNIMPLEMENTED();
   return NULL;
diff --git a/runtime/vm/intermediate_language_x64.cc b/runtime/vm/intermediate_language_x64.cc
index f899f3e..ac9d6f8 100644
--- a/runtime/vm/intermediate_language_x64.cc
+++ b/runtime/vm/intermediate_language_x64.cc
@@ -3083,6 +3083,96 @@
 }
 
 
+LocationSummary* Float32x4MinMaxInstr::MakeLocationSummary() const {
+  const intptr_t kNumInputs = 2;
+  const intptr_t kNumTemps = 0;
+  LocationSummary* summary =
+      new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
+  summary->set_in(0, Location::RequiresFpuRegister());
+  summary->set_in(1, Location::RequiresFpuRegister());
+  summary->set_out(Location::SameAsFirstInput());
+  return summary;
+}
+
+
+void Float32x4MinMaxInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  XmmRegister left = locs()->in(0).fpu_reg();
+  XmmRegister right = locs()->in(1).fpu_reg();
+
+  ASSERT(locs()->out().fpu_reg() == left);
+
+  switch (op_kind()) {
+    case MethodRecognizer::kFloat32x4Min:
+      __ minps(left, right);
+      break;
+    case MethodRecognizer::kFloat32x4Max:
+      __ maxps(left, right);
+      break;
+    default: UNREACHABLE();
+  }
+}
+
+
+LocationSummary* Float32x4ScaleInstr::MakeLocationSummary() const {
+  const intptr_t kNumInputs = 2;
+  const intptr_t kNumTemps = 0;
+  LocationSummary* summary =
+      new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
+  summary->set_in(0, Location::RequiresFpuRegister());
+  summary->set_in(1, Location::RequiresFpuRegister());
+  summary->set_out(Location::SameAsFirstInput());
+  return summary;
+}
+
+
+void Float32x4ScaleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  XmmRegister left = locs()->in(0).fpu_reg();
+  XmmRegister right = locs()->in(1).fpu_reg();
+
+  ASSERT(locs()->out().fpu_reg() == left);
+
+  switch (op_kind()) {
+    case MethodRecognizer::kFloat32x4Scale:
+      __ cvtsd2ss(left, left);
+      __ shufps(left, left, Immediate(0x00));
+      __ mulps(left, right);
+      break;
+    default: UNREACHABLE();
+  }
+}
+
+
+LocationSummary* Float32x4SqrtInstr::MakeLocationSummary() const {
+  const intptr_t kNumInputs = 1;
+  const intptr_t kNumTemps = 0;
+  LocationSummary* summary =
+      new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
+  summary->set_in(0, Location::RequiresFpuRegister());
+  summary->set_out(Location::SameAsFirstInput());
+  return summary;
+}
+
+
+void Float32x4SqrtInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
+  XmmRegister left = locs()->in(0).fpu_reg();
+
+  ASSERT(locs()->out().fpu_reg() == left);
+
+  switch (op_kind()) {
+    case MethodRecognizer::kFloat32x4Sqrt:
+      __ sqrtps(left);
+      break;
+    case MethodRecognizer::kFloat32x4Reciprocal:
+      __ reciprocalps(left);
+      break;
+    case MethodRecognizer::kFloat32x4ReciprocalSqrt:
+      __ rsqrtps(left);
+      break;
+    default: UNREACHABLE();
+  }
+}
+
+
 LocationSummary* MathSqrtInstr::MakeLocationSummary() const {
   const intptr_t kNumInputs = 1;
   const intptr_t kNumTemps = 0;
diff --git a/tests/lib/typed_data/float32x4_test.dart b/tests/lib/typed_data/float32x4_test.dart
index 7f6aa2f..4d2c15e 100644
--- a/tests/lib/typed_data/float32x4_test.dart
+++ b/tests/lib/typed_data/float32x4_test.dart
@@ -183,19 +183,19 @@
 testReciprocal() {
   var m = new Float32x4(1.0, 4.0, 9.0, 16.0);
   m = m.reciprocal();
-  Expect.approxEquals(1.0, m.x);
-  Expect.approxEquals(0.25, m.y);
-  Expect.approxEquals(0.1111111, m.z);
-  Expect.approxEquals(0.0625, m.w);
+  Expect.approxEquals(1.0, m.x, 0.001);
+  Expect.approxEquals(0.25, m.y, 0.001);
+  Expect.approxEquals(0.1111111, m.z, 0.001);
+  Expect.approxEquals(0.0625, m.w, 0.001);
 }
 
 testReciprocalSqrt() {
   var m = new Float32x4(1.0, 0.25, 0.111111, 0.0625);
   m = m.reciprocalSqrt();
-  Expect.approxEquals(1.0, m.x);
-  Expect.approxEquals(2.0, m.y);
-  Expect.approxEquals(3.0, m.z);
-  Expect.approxEquals(4.0, m.w);
+  Expect.approxEquals(1.0, m.x, 0.001);
+  Expect.approxEquals(2.0, m.y, 0.001);
+  Expect.approxEquals(3.0, m.z, 0.001);
+  Expect.approxEquals(4.0, m.w, 0.001);
 }
 
 testSelect() {