Skip to content

Commit

Permalink
🚘 Auto opt cli (#1343)
Browse files Browse the repository at this point in the history
  • Loading branch information
trajepl committed Sep 20, 2024
1 parent 180dffb commit 3fff5d4
Show file tree
Hide file tree
Showing 19 changed files with 466 additions and 57 deletions.
18 changes: 10 additions & 8 deletions olive/auto_optimizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class AutoOptimizerConfig(ConfigBase):
# if fine_tune is True, we will not suggest the training related pass, like: QLora
# fine_tune: bool = False

excluded_passes: Optional[List[str]] = None

@validator("opt_level", pre=True)
def check_opt_level(cls, v):
if v != 0:
Expand Down Expand Up @@ -76,10 +78,7 @@ def _initialize(self):
# if user can tolerate accuracy drop, we can enable more optimization
default_precisions = [Precision.FP32]
if self.is_accuracy_drop_tolerance:
# ignore int4 for now as it is not supported very well in onnxruntime
# enable it only when user explicitly set it
# default_precisions = [Precision.FP32, Precision.FP16, Precision.INT8, Precision.INT4]
default_precisions = [Precision.FP32, Precision.FP16, Precision.INT8]
default_precisions = [Precision.FP32, Precision.FP16, Precision.INT8, Precision.INT4]
self.auto_optimizer_config.precisions = self.auto_optimizer_config.precisions or default_precisions

def suggest(self):
Expand All @@ -92,11 +91,11 @@ def suggest(self):
return self._regulate(self._suggest_pass_flows())

def _suggest_pass_flows(self):
pass_flows_by_precision = []
pass_flows = []
if self.auto_optimizer_config.opt_level == 0:
pass_flows_by_precision = self._suggest_pass_flows_from_template()
pass_flows = self._suggest_pass_flows_from_template()

return pass_flows_by_precision
return pass_flows

def _suggest_pass_flows_from_template(self):
from olive.auto_optimizer.template_mapping import get_pass_flows_by_accelerator_ep_precision
Expand All @@ -110,6 +109,7 @@ def _suggest_pass_flows_from_template(self):
self.accelerator_spec.accelerator_type.value,
self.accelerator_spec.execution_provider,
precision,
self.auto_optimizer_config.excluded_passes,
)
return pass_flows_by_precision

Expand All @@ -120,4 +120,6 @@ def _regulate(self, pass_flows_by_precision):
pass_config, pass_flows = self.regulate_pass_flows_dict(pass_flows_by_precision)

# step2: fill the data_config for the passes that need data_config
return self.regulate_data_config(pass_config, pass_flows)
pass_config, pass_flows = self.regulate_data_config(pass_config, pass_flows)

return pass_config, pass_flows
4 changes: 2 additions & 2 deletions olive/auto_optimizer/config_template/opt_level_passes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# OnnxConversion -> OrtTransformersOptimization -> IncQuantization -> OrtPerfTuning
# and etc.

- [OnnxConversion]
- [OnnxConversion, ModelBuilder]
- [OrtTransformersOptimization]
- [OrtMixedPrecision, OnnxQuantization, IncQuantization, VitisAIQuantization, OnnxMatMul4Quantizer]
- [OnnxQuantization, IncQuantization, VitisAIQuantization, OnnxMatMul4Quantizer, OrtMixedPrecision]
- [OrtPerfTuning]
8 changes: 8 additions & 0 deletions olive/auto_optimizer/config_template/pass_capability.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ OnnxConversion:
EP: null
precision: null
accelerator: null
ModelBuilder:
EP:
- CPU
- CUDA
precision: null
accelerator:
- cpu
- gpu
OrtTransformersOptimization:
EP: null
precision: null
Expand Down
81 changes: 56 additions & 25 deletions olive/auto_optimizer/regulate_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,26 @@ def regulate_pass_flows_dict(self, pass_flows_dict):
# remove useless passes according to the olive model type, for example if onnx model
# the conversion pass will be removed
if self.input_model_config.type.lower().endswith("onnxmodel"):
to_remove_passes = ["OnnxConversion", "ModelBuilder"]
for pfs in pass_flows_dict.values():
for pf in pfs:
pf.remove("OnnxConversion")
for p in to_remove_passes:
if p in pf:
pf.remove(p)

# special passes: OrtTransformerOptimization and OrtPerfTuning can be used for both fp16 and fp32
# we need assign different pass name for them
# special passes: ModelBuilder, OrtTransformerOptimization and OrtPerfTuning can
# be used for both fp16 and fp32 we need assign different pass name for them
# for example: gpu_cuda_fp16, we need rename OrtTransformerOptimization to OrtTransformerOptimization_cuda_fp16
pass_flows_by_fp16 = pass_flows_dict.get("fp16", [])
pass_config, pass_flows_16 = self._regulate_fp16(None, pass_flows_by_fp16)
pass_config, pass_flows_dict = self._regulate_precision(None, pass_flows_dict)

# flatten pass_flows_dict to pass_flows and generate the default pass_configs
pass_flows = []
unique_pass_flows = set()
if pass_flows_16:
pass_flows_dict["fp16"] = pass_flows_16
for pfs in pass_flows_dict.values():
for pf in pfs:
if tuple(pf) not in unique_pass_flows:
unique_pass_flows.add(tuple(pf))
pass_flows.append(pf)
unique_pass_flows.add(tuple(pf))
for p in pf:
if p not in pass_config:
pass_config.update({p: {"type": p, "config": {}}})
Expand All @@ -42,15 +42,34 @@ def regulate_pass_flows_dict(self, pass_flows_dict):
pass_config[pass_name]["disable_search"] = True
return pass_config, pass_flows

def _regulate_fp16(self, pass_config, pass_flows):
def _fill_precision_for_model_builder(self, pass_config, pass_flows):
for precision, pfs in pass_flows.items():
for pass_flow in pfs:
for i, p in enumerate(pass_flow):
if p == "ModelBuilder":
pass_flow[i] = f"ModelBuilder_{precision}"
pass_config.update(
{
pass_flow[i]: {
"type": "ModelBuilder",
"config": {
"precision": precision,
},
}
}
)

def _regulate_precision(self, pass_config, pass_flows):
pass_config = pass_config or {}
# if it is model builder, we need to add suffix for all precisions to distinguish them
self._fill_precision_for_model_builder(pass_config, pass_flows)
is_gpu = self.accelerator_spec.accelerator_type == Device.GPU and self.accelerator_spec.execution_provider in [
"CUDAExecutionProvider",
"DmlExecutionProvider",
"TensorrtExecutionProvider",
]
if not is_gpu or not self.is_accuracy_drop_tolerance:
return {}, []
return pass_config, pass_flows

is_cuda_ep = self.accelerator_spec.execution_provider != "TensorrtExecutionProvider"
is_trt_ep = self.accelerator_spec.execution_provider == "TensorrtExecutionProvider"
Expand All @@ -66,8 +85,8 @@ def _regulate_fp16(self, pass_config, pass_flows):
perf_tuning = "OrtPerfTuning"
trans_opt_fp16 = "OrtTransformerOptimization_cuda_fp16"
perf_tuning_fp16 = "OrtPerfTuning_trt_fp16"

for i, pf in enumerate(pass_flows):
pass_flows_by_fp16 = pass_flows.get("fp16", [])
for i, pf in enumerate(pass_flows_by_fp16):
new_pf = deepcopy(pf)
if "OrtMixedPrecision" not in pf:
for j, p in enumerate(pf):
Expand Down Expand Up @@ -98,24 +117,36 @@ def _regulate_fp16(self, pass_config, pass_flows):
}
}
)

pass_flows[i] = new_pf

pass_flows_by_fp16[i] = new_pf
if pass_flows_by_fp16:
pass_flows["fp16"] = pass_flows_by_fp16
return pass_config, pass_flows

def regulate_data_config(self, pass_config, pass_flows):
if not self.data_configs or not self.auto_optimizer_config or self.auto_optimizer_config.disable_auto_optimizer:
if not self.auto_optimizer_config or self.auto_optimizer_config.disable_auto_optimizer:
return pass_config, pass_flows

if len(self.data_configs) != 1:
raise ValueError("AutoOptimizer expects exactly one data config.")

passes_require_data_config = ["OnnxQuantization", "OrtPerfTuning"]
for p in passes_require_data_config:
# TODO(anyone): support multi data_config for different passes, pass_flows
p_names = self._find_pass_name_in_pass_flow(p, pass_flows)
for pn in p_names:
pass_config[pn]["config"]["data_config"] = self.data_configs[0]
passes_require_data_config = ["OrtPerfTuning", "IncQuantization", "OnnxQuantization"]
if not self.data_configs:
# remove the passes which require data_config
for pass_flow in pass_flows:
for p in passes_require_data_config:
p_names = self._find_pass_name_in_pass_flow(p, [pass_flow])
for pn in p_names:
pass_flow.remove(pn)
pass_config.pop(pn, None)
for p in pass_flow:
if p.lower().startswith("onnxquantization"):
pass_config[p]["config"]["quant_mode"] = "dynamic"
else:
if len(self.data_configs) != 1:
raise ValueError("AutoOptimizer expects exactly one data config.")

for p in passes_require_data_config:
# TODO(anyone): support multi data_config for different passes, pass_flows
p_names = self._find_pass_name_in_pass_flow(p, pass_flows)
for pn in p_names:
pass_config[pn]["config"]["data_config"] = self.data_configs[0]

return pass_config, pass_flows

Expand Down
58 changes: 50 additions & 8 deletions olive/auto_optimizer/template_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,47 +30,89 @@ def get_available_passes_by_opt_level(opt_level):
return opt_level_passes[str(opt_level)]


def get_pass_flows_by_accelerator_ep_precision(opt_level, accelerator, ep, precision):
def remove_incompatible_passes(pass_flows):
# for suggested pass_flows, some of passes in a same pass_flow may be incompatible with each other
# e.g. model_builder(int4) -> matmul int4 quantization. We can ignore the matmul int4 quantization
# this kind of constraints should be defined manually by olive

# rule1: if the model is converted from model builder, we should remove following quantization passes
incompatible_passes_with_model_builder = [
"OnnxQuantization",
"IncQuantization",
"VitisAIQuantization",
"OnnxMatMul4Quantizer",
"OrtTransformersOptimization",
"OrtMixedPrecision",
]
for pass_flow in pass_flows:
if "ModelBuilder" in pass_flow:
for p in incompatible_passes_with_model_builder:
if p in pass_flow:
pass_flow.remove(p)

# remove duplicated pass_flows
pass_flows_tuple = {tuple(pf) for pf in pass_flows}
return [list(pf) for pf in pass_flows_tuple]


def get_pass_flows_by_accelerator_ep_precision(opt_level, accelerator, ep, precision, excluded_passes=None):
ep_literal = "ExecutionProvider"
ep = ep[: -len(ep_literal)].lower() if ep.endswith(ep_literal) else ep.lower()
precision = precision.lower()

available_passes_tree = get_available_passes_by_opt_level(opt_level)
passes_tree = get_available_passes_by_opt_level(opt_level)
excluded_passes = excluded_passes or []
available_passes_tree = []
for pass_level in passes_tree:
filtered_passes = [p for p in pass_level if p not in excluded_passes]
if not filtered_passes:
continue
available_passes_tree.append(filtered_passes)

passes_cap = get_pass_capability()
pass_flows = []

# given available_passes_tree is [a] -> [b, c] -> [d, e, f], generate all possible pass flows
# [a, b, d], [a, b, e], [a, b, f], [a, c, d], [a, c, e], [a, c, f]

# as we need to step over some intermediate passes, we cannot use len(pass_flow_candidate) to
# indicate the current pass level, instead, we use the length of available_passes_tree to indicate
# item in pass stack is (pass, depth, pass_flow_candidate)
# indicate the current pass depth, instead, we use the depth of available_passes_tree to indicate.

# item in pass stack is (depth, pass_flow_candidate)
pass_deque = deque([(-1, [])])
max_depth = len(available_passes_tree)
while pass_deque:
depth, pf_candidate = pass_deque.popleft()

# strong rule when met the last pass, traverse back to the previous pass
if depth == len(available_passes_tree) - 1:
if depth == max_depth - 1:
pass_flows.append(pf_candidate)
continue

# if we don't have any pass in next level, we cannot step over it
keep_try = True
for next_level in range(depth + 1, len(available_passes_tree)):
for next_level in range(depth + 1, max_depth):
if keep_try:
for p_next in available_passes_tree[next_level]:
if _if_match_pass_capability(p_next, passes_cap, accelerator, ep, precision):
pass_deque.append((next_level, [*pf_candidate, p_next]))
# if we find one pass in next_level, break the outer loop
keep_try = False
# did not find any pass in next level, we cannot step over it
if keep_try:
# push back and increase depth
pass_deque.append((next_level, pf_candidate))
# not `elif` here, as we need to check special case for fp16
if not keep_try:
if precision == "fp16" and len(available_passes_tree[next_level]) > 1:
if precision == "fp16" and len(pf_candidate) > 1 and pf_candidate[-1] == "OrtTransformersOptimization":
# for fp16, we can step over to next level + 1 even we find one pass in next level
# e.g: we need suggest both convert -> transformers opt -> mixed precision -> perf tuning
# and convert -> transformers opt -> perf tuning
keep_try = True
else:
break

return pass_flows
return remove_incompatible_passes(pass_flows)


def _if_match_pass_capability(p, passes_cap, accelerator, ep, precision):
Expand Down
Loading

0 comments on commit 3fff5d4

Please sign in to comment.