diff --git a/backends/cadence/fused_quant/op_linear.cpp b/backends/cadence/fused_quant/op_linear.cpp
new file mode 100644
index 00000000000..be846fd5ede
--- /dev/null
+++ b/backends/cadence/fused_quant/op_linear.cpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/cadence/fused_quant/op_linear.h>
+#include <executorch/backends/cadence/fused_quant/quant_utils.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace cadence {
+namespace fused_quant {
+namespace native {
+
+using executorch::aten::optional;
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using executorch::runtime::KernelRuntimeContext;
+
+namespace {
+
+void linear_kernel(
+    const float* inp,
+    const float* weight,
+    const float* bias,
+    float* out,
+    int64_t num_rows,
+    int64_t in_features,
+    int64_t out_features) {
+  for (int64_t r = 0; r < num_rows; ++r) {
+    for (int64_t o = 0; o < out_features; ++o) {
+      float sum = bias ? bias[o] : 0.0f;
+      for (int64_t i = 0; i < in_features; ++i) {
+        sum += inp[r * in_features + i] * weight[o * in_features + i];
+      }
+      out[r * out_features + o] = sum;
+    }
+  }
+}
+
+} // namespace
+
+Tensor& linear_out(
+    KernelRuntimeContext& ctx,
+    const Tensor& inp,
+    const Tensor& weight,
+    const optional<Tensor>& bias,
+    // inp qparams
+    const optional<Tensor>& inp_scale,
+    const optional<Tensor>& inp_zero_point,
+    ScalarType inp_dtype,
+    int64_t inp_quant_min,
+    int64_t inp_quant_max,
+    optional<int64_t> inp_axis,
+    // weight qparams
+    const optional<Tensor>& weight_scale,
+    const optional<Tensor>& weight_zero_point,
+    ScalarType weight_dtype,
+    int64_t weight_quant_min,
+    int64_t weight_quant_max,
+    optional<int64_t> weight_axis,
+    // bias qparams
+    const optional<Tensor>& bias_scale,
+    const optional<Tensor>& bias_zero_point,
+    ScalarType bias_dtype,
+    int64_t bias_quant_min,
+    int64_t bias_quant_max,
+    optional<int64_t> bias_axis,
+    // out qparams
+    const optional<Tensor>& out_scale,
+    const optional<Tensor>& out_zero_point,
+    ScalarType out_dtype,
+    int64_t out_quant_min,
+    int64_t out_quant_max,
+    optional<int64_t> out_axis,
+    Tensor& out) {
+  int64_t in_features = inp.size(inp.dim() - 1);
+  int64_t out_features = weight.size(0);
+  int64_t num_rows = inp.numel() / in_features;
+  int64_t inp_numel = inp.numel();
+  int64_t weight_numel = weight.numel();
+  int64_t out_numel = num_rows * out_features;
+
+  bool inp_quantized = inp_scale.has_value();
+  bool weight_quantized = weight_scale.has_value();
+  bool bias_quantized = bias_scale.has_value();
+  bool out_quantized = out_scale.has_value();
+
+  // Dequantize inp
+  std::vector<float> inp_buf;
+  const float* const inp_float = [&]() -> const float* {
+    if (!inp_quantized) {
+      return inp.const_data_ptr<float>();
+    }
+    inp_buf.resize(inp_numel);
+    QParams qp = extract_qparams(
+        inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp_axis, inp);
+    FUSED_QUANT_DTYPE_SWITCH(
+        inp.scalar_type(),
+        scalar_t,
+        dequantize_buffer(
+            inp.const_data_ptr<scalar_t>(), inp_buf.data(), inp_numel, qp);)
+    return inp_buf.data();
+  }();
+
+  // Dequantize weight
+  std::vector<float> weight_buf;
+  const float* const weight_float = [&]() -> const float* {
+    if (!weight_quantized) {
+      return weight.const_data_ptr<float>();
+    }
+    weight_buf.resize(weight_numel);
+    QParams qp = extract_qparams(
+        weight_scale,
+        weight_zero_point,
+        weight_quant_min,
+        weight_quant_max,
+        weight_axis,
+        weight);
+    FUSED_QUANT_DTYPE_SWITCH(weight.scalar_type(),
+                             scalar_t,
+                             dequantize_buffer(
+                                 weight.const_data_ptr<scalar_t>(),
+                                 weight_buf.data(),
+                                 weight_numel,
+                                 qp);)
+    return weight_buf.data();
+  }();
+
+  // Dequantize bias if present and quantized
+  std::vector<float> bias_buf;
+  const float* const bias_float = [&]() -> const float* {
+    if (!bias.has_value()) {
+      return nullptr;
+    }
+    const Tensor& b = bias.value();
+    if (!bias_quantized) {
+      return b.const_data_ptr<float>();
+    }
+    int64_t bias_numel = b.numel();
+    bias_buf.resize(bias_numel);
+    QParams qp = extract_qparams(
+        bias_scale,
+        bias_zero_point,
+        bias_quant_min,
+        bias_quant_max,
+        bias_axis,
+        b);
+    FUSED_QUANT_DTYPE_SWITCH(
+        b.scalar_type(),
+        scalar_t,
+        dequantize_buffer(
+            b.const_data_ptr<scalar_t>(), bias_buf.data(), bias_numel, qp);)
+    return bias_buf.data();
+  }();
+
+  // Linear + optional quantize
+  if (out_quantized) {
+    std::vector<float> result_float(out_numel);
+    linear_kernel(
+        inp_float,
+        weight_float,
+        bias_float,
+        result_float.data(),
+        num_rows,
+        in_features,
+        out_features);
+    QParams qp = extract_qparams(
+        out_scale, out_zero_point, out_quant_min, out_quant_max, out_axis, out);
+    FUSED_QUANT_DTYPE_SWITCH(out.scalar_type(),
+                             scalar_t,
+                             quantize_buffer(
+                                 result_float.data(),
+                                 out.mutable_data_ptr<scalar_t>(),
+                                 out_numel,
+                                 qp);)
+  } else {
+    linear_kernel(
+        inp_float,
+        weight_float,
+        bias_float,
+        out.mutable_data_ptr<float>(),
+        num_rows,
+        in_features,
+        out_features);
+  }
+
+  return out;
+}
+
+} // namespace native
+} // namespace fused_quant
+} // namespace cadence
diff --git a/backends/cadence/fused_quant/op_linear.h b/backends/cadence/fused_quant/op_linear.h
new file mode 100644
index 00000000000..99d20ba5bbc
--- /dev/null
+++ b/backends/cadence/fused_quant/op_linear.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace cadence {
+namespace fused_quant {
+namespace native {
+
+executorch::aten::Tensor& linear_out(
+    executorch::runtime::KernelRuntimeContext& ctx,
+    const executorch::aten::Tensor& inp,
+    const executorch::aten::Tensor& weight,
+    const executorch::aten::optional<executorch::aten::Tensor>& bias,
+    // inp qparams
+    const executorch::aten::optional<executorch::aten::Tensor>& inp_scale,
+    const executorch::aten::optional<executorch::aten::Tensor>& inp_zero_point,
+    executorch::aten::ScalarType inp_dtype,
+    int64_t inp_quant_min,
+    int64_t inp_quant_max,
+    executorch::aten::optional<int64_t> inp_axis,
+    // weight qparams
+    const executorch::aten::optional<executorch::aten::Tensor>& weight_scale,
+    const executorch::aten::optional<executorch::aten::Tensor>&
+        weight_zero_point,
+    executorch::aten::ScalarType weight_dtype,
+    int64_t weight_quant_min,
+    int64_t weight_quant_max,
+    executorch::aten::optional<int64_t> weight_axis,
+    // bias qparams
+    const executorch::aten::optional<executorch::aten::Tensor>& bias_scale,
+    const executorch::aten::optional<executorch::aten::Tensor>& bias_zero_point,
+    executorch::aten::ScalarType bias_dtype,
+    int64_t bias_quant_min,
+    int64_t bias_quant_max,
+    executorch::aten::optional<int64_t> bias_axis,
+    // out qparams
+    const executorch::aten::optional<executorch::aten::Tensor>& out_scale,
+    const executorch::aten::optional<executorch::aten::Tensor>& out_zero_point,
+    executorch::aten::ScalarType out_dtype,
+    int64_t out_quant_min,
+    int64_t out_quant_max,
+    executorch::aten::optional<int64_t> out_axis,
+    executorch::aten::Tensor& out);
+
+} // namespace native
+} // namespace fused_quant
+} // namespace cadence
diff --git a/backends/cadence/fused_quant/targets.bzl b/backends/cadence/fused_quant/targets.bzl
index f98a357ae90..2b0a82e623f 100644
--- a/backends/cadence/fused_quant/targets.bzl
+++ b/backends/cadence/fused_quant/targets.bzl
@@ -70,3 +70,15 @@ def define_common_targets():
         ],
         visibility = ["PUBLIC"],
     )
+
+    runtime.cxx_library(
+        name = "op_linear",
+        srcs = ["op_linear.cpp"],
+        exported_headers = ["op_linear.h"],
+        platforms = CXX,
+        deps = [
+            ":quant_utils",
+            "//executorch/runtime/kernel:kernel_includes",
+        ],
+        visibility = ["PUBLIC"],
+    )
diff --git a/backends/cadence/fused_quant/tests/BUCK b/backends/cadence/fused_quant/tests/BUCK
index 90b3af0aa45..6f085e26202 100644
--- a/backends/cadence/fused_quant/tests/BUCK
+++ b/backends/cadence/fused_quant/tests/BUCK
@@ -57,3 +57,14 @@ runtime.cxx_test(
         "//executorch/runtime/core/exec_aten/testing_util:tensor_util",
     ],
 )
+
+runtime.cxx_test(
+    name = "test_op_linear",
+    srcs = ["test_op_linear.cpp"],
+    platforms = CXX,
+    deps = [
+        "//executorch/backends/cadence/fused_quant:op_linear",
+        "//executorch/kernels/test:gtest_utils",
+        "//executorch/runtime/core/exec_aten/testing_util:tensor_util",
+    ],
+)
diff --git a/backends/cadence/fused_quant/tests/test_op_linear.cpp b/backends/cadence/fused_quant/tests/test_op_linear.cpp
new file mode 100644
index 00000000000..ecba8cf7a3e
--- /dev/null
+++ b/backends/cadence/fused_quant/tests/test_op_linear.cpp
@@ -0,0 +1,459 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <executorch/backends/cadence/fused_quant/op_linear.h>
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
+
+using executorch::aten::optional;
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using executorch::runtime::testing::TensorFactory;
+
+namespace {
+
+optional<Tensor> none_tensor() {
+  return optional<Tensor>();
+}
+
+optional<int64_t> none_axis() {
+  return optional<int64_t>();
+}
+
+} // namespace
+
+class FusedQuantLinearTest : public OperatorTest {};
+
+// All quantized, no bias: int8 inp + int8 weight -> int8 out
+TEST_F(FusedQuantLinearTest, AllQuantizedNoBias) {
+  TensorFactory<ScalarType::Char> tf_int8;
+  TensorFactory<ScalarType::Float> tf_float;
+  TensorFactory<ScalarType::Long> tf_long;
+
+  // inp [1,2]: int8 {2,4}, scale=0.5, zp=0 -> float {1.0, 2.0}
+  Tensor inp = tf_int8.make({1, 2}, {2, 4});
+  Tensor inp_scale = tf_float.make({1}, {0.5});
+  Tensor inp_zp = tf_long.make({1}, {0});
+
+  // weight [2,2]: int8 {2,0,0,2}, scale=0.5, zp=0
+  //   -> float {{1,0},{0,1}} (identity)
+  Tensor weight = tf_int8.make({2, 2}, {2, 0, 0, 2});
+  Tensor weight_scale = tf_float.make({1}, {0.5});
+  Tensor weight_zp = tf_long.make({1}, {0});
+
+  // out qparams: scale=0.5, zp=0
+  Tensor out_scale = tf_float.make({1}, {0.5});
+  Tensor out_zp = tf_long.make({1}, {0});
+
+  Tensor out = tf_int8.zeros({1, 2});
+
+  // linear: {1,2} @ identity = {1,2}
+  // requant (scale=0.5, zp=0): {round(1/0.5), round(2/0.5)} = {2, 4}
+  cadence::fused_quant::native::linear_out(
+      context_,
+      inp,
+      weight,
+      none_tensor(), // no bias
+      // inp qparams
+      optional<Tensor>(inp_scale),
+      optional<Tensor>(inp_zp),
+      ScalarType::Float,
+      -128,
+      127,
+      none_axis(),
+      // weight qparams
+      optional<Tensor>(weight_scale),
+      optional<Tensor>(weight_zp),
+      ScalarType::Float,
+      -128,
+      127,
+      none_axis(),
+      // bias qparams (unused, no bias)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      // out qparams
+      optional<Tensor>(out_scale),
+      optional<Tensor>(out_zp),
+      ScalarType::Char,
+      -128,
+      127,
+      none_axis(),
+      out);
+
+  EXPECT_TENSOR_EQ(out, tf_int8.make({1, 2}, {2, 4}));
+}
+
+// All quantized with bias: int8 inp + int8 weight + int8 bias -> int8 out
+TEST_F(FusedQuantLinearTest, AllQuantizedWithBias) {
+  TensorFactory<ScalarType::Char> tf_int8;
+  TensorFactory<ScalarType::Float> tf_float;
+  TensorFactory<ScalarType::Long> tf_long;
+
+  // inp [1,2]: int8 {2,4}, scale=0.5, zp=0 -> float {1.0, 2.0}
+  Tensor inp = tf_int8.make({1, 2}, {2, 4});
+  Tensor inp_scale = tf_float.make({1}, {0.5});
+  Tensor inp_zp = tf_long.make({1}, {0});
+
+  // weight [2,2]: int8 {2,0,0,2}, scale=0.5, zp=0
+  //   -> float {{1,0},{0,1}} (identity)
+  Tensor weight = tf_int8.make({2, 2}, {2, 0, 0, 2});
+  Tensor weight_scale = tf_float.make({1}, {0.5});
+  Tensor weight_zp = tf_long.make({1}, {0});
+
+  // bias [2]: int8 {2,2}, scale=0.5, zp=0 -> float {1.0, 1.0}
+  Tensor bias = tf_int8.make({2}, {2, 2});
+  Tensor bias_scale = tf_float.make({1}, {0.5});
+  Tensor bias_zp = tf_long.make({1}, {0});
+
+  // out qparams: scale=0.5, zp=0
+  Tensor out_scale = tf_float.make({1}, {0.5});
+  Tensor out_zp = tf_long.make({1}, {0});
+
+  Tensor out = tf_int8.zeros({1, 2});
+
+  // linear: {1,2} @ identity + {1,1} = {2, 3}
+  // requant (scale=0.5, zp=0): {round(2/0.5), round(3/0.5)} = {4, 6}
+  cadence::fused_quant::native::linear_out(
+      context_,
+      inp,
+      weight,
+      optional<Tensor>(bias),
+      // inp qparams
+      optional<Tensor>(inp_scale),
+      optional<Tensor>(inp_zp),
+      ScalarType::Float,
+      -128,
+      127,
+      none_axis(),
+      // weight qparams
+      optional<Tensor>(weight_scale),
+      optional<Tensor>(weight_zp),
+      ScalarType::Float,
+      -128,
+      127,
+      none_axis(),
+      // bias qparams
+      optional<Tensor>(bias_scale),
+      optional<Tensor>(bias_zp),
+      ScalarType::Float,
+      -128,
+      127,
+      none_axis(),
+      // out qparams
+      optional<Tensor>(out_scale),
+      optional<Tensor>(out_zp),
+      ScalarType::Char,
+      -128,
+      127,
+      none_axis(),
+      out);
+
+  EXPECT_TENSOR_EQ(out, tf_int8.make({1, 2}, {4, 6}));
+}
+
+// Float inputs -> int8 output
+TEST_F(FusedQuantLinearTest, FloatInputsQuantizedOutput) {
+  TensorFactory<ScalarType::Char> tf_int8;
+  TensorFactory<ScalarType::Float> tf_float;
+  TensorFactory<ScalarType::Long> tf_long;
+
+  // inp [1,2]: float {1.0, 2.0}
+  Tensor inp = tf_float.make({1, 2}, {1.0, 2.0});
+
+  // weight [2,2]: float identity {{1,0},{0,1}}
+  Tensor weight = tf_float.make({2, 2}, {1.0, 0.0, 0.0, 1.0});
+
+  // out qparams: scale=0.5, zp=0
+  Tensor out_scale = tf_float.make({1}, {0.5});
+  Tensor out_zp = tf_long.make({1}, {0});
+
+  Tensor out = tf_int8.zeros({1, 2});
+
+  // linear: {1,2} @ identity = {1, 2}
+  // requant (scale=0.5, zp=0): {2, 4}
+  cadence::fused_quant::native::linear_out(
+      context_,
+      inp,
+      weight,
+      none_tensor(), // no bias
+      // inp qparams (not quantized)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      // weight qparams (not quantized)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      // bias qparams (no bias)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      // out qparams
+      optional<Tensor>(out_scale),
+      optional<Tensor>(out_zp),
+      ScalarType::Char,
+      -128,
+      127,
+      none_axis(),
+      out);
+
+  EXPECT_TENSOR_EQ(out, tf_int8.make({1, 2}, {2, 4}));
+}
+
+// int8 inputs -> float output
+TEST_F(FusedQuantLinearTest, QuantizedInputsFloatOutput) {
+  TensorFactory<ScalarType::Char> tf_int8;
+  TensorFactory<ScalarType::Float> tf_float;
+  TensorFactory<ScalarType::Long> tf_long;
+
+  // inp [1,2]: int8 {2,4}, scale=0.5, zp=0 -> float {1.0, 2.0}
+  Tensor inp = tf_int8.make({1, 2}, {2, 4});
+  Tensor inp_scale = tf_float.make({1}, {0.5});
+  Tensor inp_zp = tf_long.make({1}, {0});
+
+  // weight [2,2]: int8 {2,0,0,2}, scale=0.5, zp=0 -> identity
+  Tensor weight = tf_int8.make({2, 2}, {2, 0, 0, 2});
+  Tensor weight_scale = tf_float.make({1}, {0.5});
+  Tensor weight_zp = tf_long.make({1}, {0});
+
+  Tensor out = tf_float.zeros({1, 2});
+
+  // linear: {1,2} @ identity = {1.0, 2.0}
+  cadence::fused_quant::native::linear_out(
+      context_,
+      inp,
+      weight,
+      none_tensor(), // no bias
+      // inp qparams
+      optional<Tensor>(inp_scale),
+      optional<Tensor>(inp_zp),
+      ScalarType::Float,
+      -128,
+      127,
+      none_axis(),
+      // weight qparams
+      optional<Tensor>(weight_scale),
+      optional<Tensor>(weight_zp),
+      ScalarType::Float,
+      -128,
+      127,
+      none_axis(),
+      // bias qparams (no bias)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      // out qparams (float, not quantized)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      out);
+
+  EXPECT_TENSOR_EQ(out, tf_float.make({1, 2}, {1.0, 2.0}));
+}
+
+// Per-channel quantized weights (axis=0)
+TEST_F(FusedQuantLinearTest, PerChannelWeights) {
+  TensorFactory<ScalarType::Char> tf_int8;
+  TensorFactory<ScalarType::Float> tf_float;
+  TensorFactory<ScalarType::Long> tf_long;
+
+  // inp [1,2]: float {1.0, 2.0}
+  Tensor inp = tf_float.make({1, 2}, {1.0, 2.0});
+
+  // weight [2,2]: int8 {2,4,3,6}, per-channel axis=0
+  //   ch0 scale=0.5: {(2-0)*0.5, (4-0)*0.5} = {1.0, 2.0}
+  //   ch1 scale=1.0: {(3-0)*1.0, (6-0)*1.0} = {3.0, 6.0}
+  Tensor weight = tf_int8.make({2, 2}, {2, 4, 3, 6});
+  Tensor weight_scale = tf_float.make({2}, {0.5, 1.0});
+  Tensor weight_zp = tf_long.make({2}, {0, 0});
+
+  // out qparams: scale=0.5, zp=0
+  Tensor out_scale = tf_float.make({1}, {0.5});
+  Tensor out_zp = tf_long.make({1}, {0});
+
+  Tensor out = tf_int8.zeros({1, 2});
+
+  // linear: out[0] = 1*1 + 2*2 = 5, out[1] = 1*3 + 2*6 = 15
+  // requant (scale=0.5, zp=0): {round(5/0.5), round(15/0.5)} = {10, 30}
+  cadence::fused_quant::native::linear_out(
+      context_,
+      inp,
+      weight,
+      none_tensor(), // no bias
+      // inp qparams (not quantized)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      // weight qparams (per-channel, axis=0)
+      optional<Tensor>(weight_scale),
+      optional<Tensor>(weight_zp),
+      ScalarType::Float,
+      -128,
+      127,
+      optional<int64_t>(0),
+      // bias qparams (no bias)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      // out qparams
+      optional<Tensor>(out_scale),
+      optional<Tensor>(out_zp),
+      ScalarType::Char,
+      -128,
+      127,
+      none_axis(),
+      out);
+
+  EXPECT_TENSOR_EQ(out, tf_int8.make({1, 2}, {10, 30}));
+}
+
+// Batched input: inp [2,2]
+TEST_F(FusedQuantLinearTest, BatchedInput) {
+  TensorFactory<ScalarType::Float> tf_float;
+
+  // inp [2,2]: float, 2 batch rows
+  Tensor inp = tf_float.make({2, 2}, {1.0, 2.0, 3.0, 4.0});
+
+  // weight [2,2]: float identity
+  Tensor weight = tf_float.make({2, 2}, {1.0, 0.0, 0.0, 1.0});
+
+  Tensor out = tf_float.zeros({2, 2});
+
+  // linear row0: {1,2} @ identity = {1, 2}
+  // linear row1: {3,4} @ identity = {3, 4}
+  cadence::fused_quant::native::linear_out(
+      context_,
+      inp,
+      weight,
+      none_tensor(), // no bias
+      // inp qparams (not quantized)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      // weight qparams (not quantized)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      // bias qparams (no bias)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      // out qparams (not quantized)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      out);
+
+  EXPECT_TENSOR_EQ(out, tf_float.make({2, 2}, {1.0, 2.0, 3.0, 4.0}));
+}
+
+// Non-zero zero points
+TEST_F(FusedQuantLinearTest, NonZeroZeroPoint) {
+  TensorFactory<ScalarType::Char> tf_int8;
+  TensorFactory<ScalarType::Float> tf_float;
+  TensorFactory<ScalarType::Long> tf_long;
+
+  // inp [1,2]: int8 {6,8}, scale=0.25, zp=2
+  //   dequant: {(6-2)*0.25, (8-2)*0.25} = {1.0, 1.5}
+  Tensor inp = tf_int8.make({1, 2}, {6, 8});
+  Tensor inp_scale = tf_float.make({1}, {0.25});
+  Tensor inp_zp = tf_long.make({1}, {2});
+
+  // weight [2,2]: int8 {6,2,2,6}, scale=0.25, zp=2
+  //   dequant: {(6-2)*0.25, (2-2)*0.25, (2-2)*0.25, (6-2)*0.25}
+  //          = {1.0, 0.0, 0.0, 1.0} (identity)
+  Tensor weight = tf_int8.make({2, 2}, {6, 2, 2, 6});
+  Tensor weight_scale = tf_float.make({1}, {0.25});
+  Tensor weight_zp = tf_long.make({1}, {2});
+
+  // out: scale=0.5, zp=1
+  Tensor out_scale = tf_float.make({1}, {0.5});
+  Tensor out_zp = tf_long.make({1}, {1});
+
+  Tensor out = tf_int8.zeros({1, 2});
+
+  // linear: {1.0, 1.5} @ identity = {1.0, 1.5}
+  // requant (scale=0.5, zp=1): {round(1.0/0.5)+1, round(1.5/0.5)+1} = {3, 4}
+  cadence::fused_quant::native::linear_out(
+      context_,
+      inp,
+      weight,
+      none_tensor(), // no bias
+      // inp qparams
+      optional<Tensor>(inp_scale),
+      optional<Tensor>(inp_zp),
+      ScalarType::Float,
+      -128,
+      127,
+      none_axis(),
+      // weight qparams
+      optional<Tensor>(weight_scale),
+      optional<Tensor>(weight_zp),
+      ScalarType::Float,
+      -128,
+      127,
+      none_axis(),
+      // bias qparams (no bias)
+      none_tensor(),
+      none_tensor(),
+      ScalarType::Float,
+      0,
+      0,
+      none_axis(),
+      // out qparams
+      optional<Tensor>(out_scale),
+      optional<Tensor>(out_zp),
+      ScalarType::Char,
+      -128,
+      127,
+      none_axis(),
+      out);
+
+  EXPECT_TENSOR_EQ(out, tf_int8.make({1, 2}, {3, 4}));
+}