Source code for pyspark.ml.param

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from abc import ABCMeta
import copy

from pyspark import since
from pyspark.ml.util import Identifiable


__all__ = ['Param', 'Params']


[docs]class Param(object): """ A param with self-contained documentation. .. versionadded:: 1.3.0 """ def __init__(self, parent, name, doc): if not isinstance(parent, Identifiable): raise TypeError("Parent must be an Identifiable but got type %s." % type(parent)) self.parent = parent.uid self.name = str(name) self.doc = str(doc) def __str__(self): return str(self.parent) + "__" + self.name def __repr__(self): return "Param(parent=%r, name=%r, doc=%r)" % (self.parent, self.name, self.doc) def __hash__(self): return hash(str(self)) def __eq__(self, other): if isinstance(other, Param): return self.parent == other.parent and self.name == other.name else: return False
[docs]class Params(Identifiable): """ Components that take parameters. This also provides an internal param map to store parameter values attached to the instance. .. versionadded:: 1.3.0 """ __metaclass__ = ABCMeta def __init__(self): super(Params, self).__init__() #: internal param map for user-supplied values param map self._paramMap = {} #: internal param map for default values self._defaultParamMap = {} #: value returned by :py:func:`params` self._params = None @property @since("1.3.0") def params(self): """ Returns all params ordered by name. The default implementation uses :py:func:`dir` to get all attributes of type :py:class:`Param`. """ if self._params is None: self._params = list(filter(lambda attr: isinstance(attr, Param), [getattr(self, x) for x in dir(self) if x != "params"])) return self._params @since("1.4.0")
[docs] def explainParam(self, param): """ Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string. """ param = self._resolveParam(param) values = [] if self.isDefined(param): if param in self._defaultParamMap: values.append("default: %s" % self._defaultParamMap[param]) if param in self._paramMap: values.append("current: %s" % self._paramMap[param]) else: values.append("undefined") valueStr = "(" + ", ".join(values) + ")" return "%s: %s %s" % (param.name, param.doc, valueStr)
@since("1.4.0")
[docs] def explainParams(self): """ Returns the documentation of all params with their optionally default values and user-supplied values. """ return "\n".join([self.explainParam(param) for param in self.params])
@since("1.4.0")
[docs] def getParam(self, paramName): """ Gets a param by its name. """ param = getattr(self, paramName) if isinstance(param, Param): return param else: raise ValueError("Cannot find param with name %s." % paramName)
@since("1.4.0")
[docs] def isSet(self, param): """ Checks whether a param is explicitly set by user. """ param = self._resolveParam(param) return param in self._paramMap
@since("1.4.0")
[docs] def hasDefault(self, param): """ Checks whether a param has a default value. """ param = self._resolveParam(param) return param in self._defaultParamMap
@since("1.4.0")
[docs] def isDefined(self, param): """ Checks whether a param is explicitly set by user or has a default value. """ return self.isSet(param) or self.hasDefault(param)
@since("1.4.0")
[docs] def hasParam(self, paramName): """ Tests whether this instance contains a param with a given (string) name. """ param = self._resolveParam(paramName) return param in self.params
@since("1.4.0")
[docs] def getOrDefault(self, param): """ Gets the value of a param in the user-supplied param map or its default value. Raises an error if neither is set. """ param = self._resolveParam(param) if param in self._paramMap: return self._paramMap[param] else: return self._defaultParamMap[param]
@since("1.4.0")
[docs] def extractParamMap(self, extra=None): """ Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra. :param extra: extra param values :return: merged param map """ if extra is None: extra = dict() paramMap = self._defaultParamMap.copy() paramMap.update(self._paramMap) paramMap.update(extra) return paramMap
@since("1.4.0")
[docs] def copy(self, extra=None): """ Creates a copy of this instance with the same uid and some extra params. The default implementation creates a shallow copy using :py:func:`copy.copy`, and then copies the embedded and extra parameters over and returns the copy. Subclasses should override this method if the default approach is not sufficient. :param extra: Extra parameters to copy to the new instance :return: Copy of this instance """ if extra is None: extra = dict() that = copy.copy(self) that._paramMap = self.extractParamMap(extra) return that
def _shouldOwn(self, param): """ Validates that the input param belongs to this Params instance. """ if not (self.uid == param.parent and self.hasParam(param.name)): raise ValueError("Param %r does not belong to %r." % (param, self)) def _resolveParam(self, param): """ Resolves a param and validates the ownership. :param param: param name or the param instance, which must belong to this Params instance :return: resolved param instance """ if isinstance(param, Param): self._shouldOwn(param) return param elif isinstance(param, str): return self.getParam(param) else: raise ValueError("Cannot resolve %r as a param." % param) @staticmethod def _dummy(): """ Returns a dummy Params instance used as a placeholder to generate docs. """ dummy = Params() dummy.uid = "undefined" return dummy def _set(self, **kwargs): """ Sets user-supplied params. """ for param, value in kwargs.items(): self._paramMap[getattr(self, param)] = value return self def _setDefault(self, **kwargs): """ Sets default params. """ for param, value in kwargs.items(): self._defaultParamMap[getattr(self, param)] = value return self def _copyValues(self, to, extra=None): """ Copies param values from this instance to another instance for params shared by them. :param to: the target instance :param extra: extra params to be copied :return: the target instance with param values copied """ if extra is None: extra = dict() paramMap = self.extractParamMap(extra) for p in self.params: if p in paramMap and to.hasParam(p.name): to._set(**{p.name: paramMap[p]}) return to