🗂 Source Map
▼- mlflow/entities/run.py Run entity
- mlflow/tracking/fluent.py Fluent API
- mlflow/tracking/client.py MlflowClient
- mlflow/store/tracking/sqlalchemy_store.py Backend store
- mlflow/models/model.py Model class
- mlflow/pyfunc/__init__.py PyFuncModel
- mlflow/utils/autologging_utils/__init__.py Autologging
Getting Started
pip install mlflowimport mlflow
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
mlflow.set_experiment("iris-classification")
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
with mlflow.start_run():
params = {"solver": "lbfgs", "max_iter": 1000}
mlflow.log_params(params)
model = LogisticRegression(**params)
model.fit(X_train, y_train)
acc = accuracy_score(y_test, model.predict(X_test))
mlflow.log_metric("accuracy", acc)
mlflow.sklearn.log_model(model, name="iris_model")Source Code Walkthrough
Run Entity — The Atomic Unit of Tracking
The Run class composes RunInfo, RunData, RunInputs, and RunOutputs. Uses composition over inheritance, with each component independently serializable via protobuf.
class Run(_MlflowObject):
def __init__(self, run_info, run_data,
run_inputs=None, run_outputs=None):
if run_info is None:
raise MlflowException("run_info cannot be None")
self._info = run_info
self._data = run_data
self._inputs = run_inputs
self._outputs = run_outputs
@property
def info(self):
"""The run metadata: id, start time, status."""
return self._info
@property
def data(self):
"""Metrics, parameters, and tags."""
return self._dataFluent API — Thread-Local Run Management
The fluent API manages a thread-local run stack. _get_or_start_run() auto-creates a run if none is active.
def start_run(
run_id=None, experiment_id=None,
run_name=None, nested=False,
tags=None, description=None,
) -> ActiveRun:
"""Start a new MLflow run, setting it as the active run."""
# Creates or resumes a run, pushes onto thread-local stack
def log_param(key, value, synchronous=None):
"""Log a parameter under the current run."""
run_id = _get_or_start_run().info.run_id
MlflowClient().log_param(run_id, key, value)MlflowClient — The Explicit API
Wraps TrackingServiceClient and ModelRegistryClient. Resolves tracking URI from argument, env var, or default.
class MlflowClient:
def __init__(self, tracking_uri=None, registry_uri=None):
self._tracking_client = TrackingServiceClient(uri)
self._registry_client = ModelRegistryClient(registry_uri)
def log_metric(self, run_id, key, value, **kwargs):
self._tracking_client.log_metric(run_id, key, value, **kwargs)
def search_runs(self, experiment_ids, filter_string="", **kwargs):
return self._tracking_client.search_runs(...)SQLAlchemy Backend Store
Class-level engine cache prevents connection pool leaks. Dual-writes to metrics (history) and latest_metrics (fast queries).
class SqlAlchemyStore(AbstractStore):
_engine_map = {} # Class-level cache
_engine_map_lock = threading.Lock()
def log_metric(self, run_id, metric):
with self.ManagedSessionMaker() as session:
metric.value = sanitize_metric_value(metric.value)
session.add(SqlMetric(run_uuid=run_id, **metric.to_dict()))
self._update_latest_metric(session, run_id, metric)Model Class — The MLmodel Format
Defines the flavor system. add_flavor() is called by framework-specific log_model functions. The pyfunc flavor is always present.
class Model:
MLMODEL_FILE_NAME = "MLmodel"
def __init__(self, flavors=None, signature=None, ...):
self.flavors = flavors if flavors else {}
self.model_uuid = str(uuid.uuid4())
def add_flavor(self, name, **params):
"""Add a flavor (e.g. sklearn, python_function)."""
self.flavors[name] = params
return self
def save(self, path):
"""Write MLmodel YAML to the given directory."""
with open(os.path.join(path, self.MLMODEL_FILE_NAME), "w") as f:
self.to_yaml(f)PyFuncModel — Universal Inference Interface
Wraps any model with a consistent predict() interface. Accepts DataFrames, arrays, lists, and dicts.
class PyFuncModel:
def __init__(self, model_meta, model_impl,
predict_fn="predict"):
self._model_meta = model_meta
self._model_impl = model_impl
def predict(self, data, params=None):
"""Generate predictions from any MLflow model."""
fn = getattr(self._model_impl, self._predict_fn)
return fn(data)Autologging Integration — Safe Monkey-Patching
The autologging_integration decorator registers integrations and manages configuration with thread-safe locks.
def autologging_integration(name):
def wrapper(_autolog):
validate_param_spec(param_spec)
AUTOLOGGING_INTEGRATIONS[name] = {}
@autologging_conf_lock
def autolog(*args, **kwargs):
AUTOLOGGING_INTEGRATIONS[name] = config
revert_patches(name) # Prevent patch stacking
return _autolog(*args, **kwargs)
return wrapped_autolog
return wrapper