Question

我正在创建一些类来处理各种类型的文件共享（nfs，afp，s3，本地磁盘）等文件名。我在用户输入中获得一个标识数据源的字符串（即"nfs://192.168.1.3"或"s3://mybucket/data"）等。

我从具有公共代码的基类继承特定文件系统。我困惑的地方是对象创建。我所拥有的是以下内容：

import os

class FileSystem(object):
    class NoAccess(Exception):
        pass

    def __new__(cls,path):
        if cls is FileSystem:
            if path.upper().startswith('NFS://'): 
                return super(FileSystem,cls).__new__(Nfs)
            else: 
                return super(FileSystem,cls).__new__(LocalDrive)
        else:
            return super(FileSystem,cls).__new__(cls,path)

    def count_files(self):
        raise NotImplementedError

class Nfs(FileSystem):
    def __init__ (self,path):
        pass

    def count_files(self):
        pass

class LocalDrive(FileSystem):
    def __init__(self,path):
        if not os.access(path, os.R_OK):
            raise FileSystem.NoAccess('Cannot read directory')
        self.path = path

    def count_files(self):
        return len([x for x in os.listdir(self.path) if os.path.isfile(os.path.join(self.path, x))])

data1 = FileSystem('nfs://192.168.1.18')
data2 = FileSystem('/var/log')

print type(data1)
print type(data2)

print data2.count_files()

我认为这可以很好地利用__new__，但我读到的大多数帖子都会阻止它。是否有更可接受的方法来解决这个问题？

Answer 1

我不认为使用__new__()做你想做的事是不合适的。换句话说，我不同意这个question的接受答案，即工厂功能始终是＆＃34;最好的方式来做到这一点。

如果你真的想避免使用它，那么唯一的选择是元类或单独的factory函数/方法。鉴于可用的选择，将__new__()方法设为一个 - 因为它默认是静态的 - 是一种非常明智的方法。

那就是说，下面是我认为代码的改进版本。我已经添加了几个类方法来帮助自动查找所有子类。这些支持最重要的方式 - 现在添加子类并不需要修改__new__()方法。这意味着它现在可以轻松扩展，因为它有效地支持了你可以称之为虚拟构造函数的东西。

也可以使用类似的实现将实例的创建从__new__方法移动到单独的（静态）工厂方法中 - 因此在某种意义上，所示的技术只是编码可扩展的相对简单的方法通用工厂功能，无论它给出了什么名称。

import os
import re

class FileSystem(object):
    class NoAccess(Exception): pass
    class Unknown(Exception): pass

    # Pattern for matching "xxx://" where x is any character except for ":".
    _PATH_PREFIX_PATTERN = re.compile(r'\s*([^:]+)://')

    @classmethod
    def _get_all_subclasses(cls):
        """ Recursive generator of all class' subclasses. """
        for subclass in cls.__subclasses__():
            yield subclass
            for subclass in subclass._get_all_subclasses():
                yield subclass

    @classmethod
    def _get_prefix(cls, s):
        """ Extract any file system prefix at beginning of string s and
            return a lowercase version of it or None when there isn't one.
        """
        match = cls._PATH_PREFIX_PATTERN.match(s)
        return match.group(1).lower() if match else None

    def __new__(cls, path):
        """ Create instance of appropriate subclass using path prefix. """
        path_prefix = cls._get_prefix(path)

        for subclass in cls._get_all_subclasses():
            if subclass.prefix == path_prefix:
                # Using "object" base class method avoids recursion here.
                return object.__new__(subclass)
        else:  # no subclass with matching prefix found (and no default defined)
            raise FileSystem.Unknown(
                'path "{}" has no known file system prefix'.format(path))

    def count_files(self):
        raise NotImplementedError


class Nfs(FileSystem):
    prefix = 'nfs'

    def __init__ (self, path):
        pass

    def count_files(self):
        pass


class LocalDrive(FileSystem):
    prefix = None  # Default when no file system prefix is found.

    def __init__(self, path):
        if not os.access(path, os.R_OK):
            raise FileSystem.NoAccess('Cannot read directory')
        self.path = path

    def count_files(self):
        return sum(os.path.isfile(os.path.join(self.path, filename))
                     for filename in os.listdir(self.path))


if __name__ == '__main__':

    data1 = FileSystem('nfs://192.168.1.18')
    data2 = FileSystem('c:/')  # Change as necessary for testing.

    print(type(data1))  # -> <class '__main__.Nfs'>
    print(type(data2))  # -> <class '__main__.LocalDrive'>

    print(data2.count_files())  # -> <some number>

Answer 2

在我看来，以这种方式使用__new__对于可能阅读您的代码的其他人来说真的很困惑。此外，它需要一些hackish代码来区分猜测文件系统和用户输入，并创建Nfs和LocalDrive及其相应的类。

为什么不使用此行为创建单独的函数？它甚至可以是FileSystem类的静态方法：

class FileSystem(object):
    # other code ...

    @staticmethod
    def from_path(path):
        if path.upper().startswith('NFS://'): 
            return Nfs(path)
        else: 
            return LocalDrive(path)

你这样称呼它：

data1 = FileSystem.from_path('nfs://192.168.1.18')
data2 = FileSystem.from_path('/var/log')

Answer 3

编辑[BLUF] ：@martineau提供的答案没有问题，本文仅是后续工作，以讨论在类定义中使用其他关键字时遇到的潜在错误。不是由元类管理的。

我想提供一些有关__init_subclass__与工厂__new__结合使用的信息。 @martineau发布的答案非常有用，我在自己的程序中实现了它的更改版本，因为我更喜欢使用类创建顺序，而不是在名称空间中添加工厂方法。与pathlib.Path的实施方式非常相似。

为了跟@martinaeu进行评论，我从他的回答中摘录了以下片段：

import os
import re

class FileSystem(object):
    class NoAccess(Exception): pass
    class Unknown(Exception): pass

    # Regex for matching "xxx://" where x is any non-whitespace character except for ":".
    _PATH_PREFIX_PATTERN = re.compile(r'\s*([^:]+)://')
    _registry = {}  # Registered subclasses.

    @classmethod
    def __init_subclass__(cls, /, **kwargs):
        path_prefix = kwargs.pop('path_prefix', None)
        super().__init_subclass__(**kwargs)
        cls._registry[path_prefix] = cls  # Add class to registry.

    @classmethod
    def _get_prefix(cls, s):
        """ Extract any file system prefix at beginning of string s and
            return a lowercase version of it or None when there isn't one.
        """
        match = cls._PATH_PREFIX_PATTERN.match(s)
        return match.group(1).lower() if match else None

    def __new__(cls, path):
        """ Create instance of appropriate subclass. """
        path_prefix = cls._get_prefix(path)
        subclass = FileSystem._registry.get(path_prefix)
        if subclass:
            # Using "object" base class method avoids recursion here.
            return object.__new__(subclass)
        else:  # No subclass with matching prefix found (and no default).
            raise FileSystem.Unknown(
                f'path "{path}" has no known file system prefix')

    def count_files(self):
        raise NotImplementedError


class Nfs(FileSystem, path_prefix='nfs'):
    def __init__ (self, path):
        pass

    def count_files(self):
        pass


class LocalDrive(FileSystem, path_prefix=None):  # Default file system.
    def __init__(self, path):
        if not os.access(path, os.R_OK):
            raise FileSystem.NoAccess('Cannot read directory')
        self.path = path

    def count_files(self):
        return sum(os.path.isfile(os.path.join(self.path, filename))
                     for filename in os.listdir(self.path))


if __name__ == '__main__':

    data1 = FileSystem('nfs://192.168.1.18')
    data2 = FileSystem('c:/')  # Change as necessary for testing.

    print(type(data1).__name__)  # -> Nfs
    print(type(data2).__name__)  # -> LocalDrive

    print(data2.count_files())  # -> <some number>

    try:
        data3 = FileSystem('foobar://42')  # Unregistered path prefix.
    except FileSystem.Unknown as exc:
        print(str(exc), '- raised as expected')
    else:
        raise RuntimeError(
              "Unregistered path prefix should have raised Exception!")

这个答案是书面的，但我想解决其他人可能由于经验不足或团队需要的代码库标准而遇到的一些问题（潜在的陷阱）。

首先，对于__init_subclass__上的装饰器，按照PEP：

可能需要在@classmethod装饰器上显式使用__init_subclass__。之所以将其设为隐含的，是因为没有理智的解释可以将其省略，并且无论如何都需要检测该情况以提供有用的错误消息。

由于已经暗示了问题，所以这不是问题，禅宗告诉我们“对隐式的利用”。但是，当遵守PEP时，您就去了（进一步解释了理性）。

在我自己的类似解决方案的实现中，未使用其他关键字参数来定义子类，例如@martineau在此处进行操作：

class Nfs(FileSystem, path_prefix='nfs'): ...
class LocalDrive(FileSystem, path_prefix=None): ...

浏览PEP时：

第二个更改是，新的type.__init__仅忽略关键字参数。当前，它坚持不提供关键字参数。如果元类不对类声明提供关键字参数，则会导致（错误）错误。确实希望接受关键字参数的元类作者必须通过覆盖__init__来过滤掉它们。

为什么这个（潜在地）有问题？好了，有几个问题（特别是this）描述了有关类定义中其他关键字参数，元类的使用（随后为metaclass=关键字）和覆盖的__init_subclass__的问题。但是，这并不能解释为什么它可以在当前给定的解决方案中工作。答案：kwargs.pop()。

如果我们看以下内容：

# code in CPython 3.7

import os
import re

class FileSystem(object):
    class NoAccess(Exception): pass
    class Unknown(Exception): pass

    # Regex for matching "xxx://" where x is any non-whitespace character except for ":".
    _PATH_PREFIX_PATTERN = re.compile(r'\s*([^:]+)://')
    _registry = {}  # Registered subclasses.

    def __init_subclass__(cls, **kwargs):
        path_prefix = kwargs.pop('path_prefix', None)
        super().__init_subclass__(**kwargs)
        cls._registry[path_prefix] = cls  # Add class to registry.

    ...

class Nfs(FileSystem, path_prefix='nfs'): ...

这仍然可以正常运行，但是如果我们删除kwargs.pop()：

    def __init_subclass__(cls, **kwargs):
        super().__init_subclass__(**kwargs)  # throws TypeError
        cls._registry[path_prefix] = cls  # Add class to registry.

在PEP中已经知道并描述了抛出的错误：

在新代码中，不是__init__抱怨关键字参数，而是__init_subclass__，其默认实现不带参数。在使用方法解析顺序的经典继承方案中，每个__init_subclass__都可以取出其关键字参数，直到没有剩下的为止，这由__init_subclass__的默认实现检查。

正在发生的事情是path_prefix=关键字被从kwargs中弹出，而不仅仅是被访问，所以**kwargs现在为空并向上传递到MRO，从而符合默认实现（不接收关键字参数）。

为完全避免这种情况，我建议不要依赖kwargs，而应使用对__init_subclass__的调用中已经存在的内容，即cls参考：

# code in CPython 3.7

import os
import re

class FileSystem(object):
    class NoAccess(Exception): pass
    class Unknown(Exception): pass

    # Regex for matching "xxx://" where x is any non-whitespace character except for ":".
    _PATH_PREFIX_PATTERN = re.compile(r'\s*([^:]+)://')
    _registry = {}  # Registered subclasses.

    def __init_subclass__(cls, **kwargs):
        super().__init_subclass__(**kwargs)
        cls._registry[cls._path_prefix] = cls  # Add class to registry.

    ...

class Nfs(FileSystem):
    _path_prefix = 'nfs'

    ...

如果需要引用子类使用的特定前缀（通过self._path_prefix），则将previous关键字添加为类属性也可以在以后的方法中扩展使用。据我所知，您不能在定义中引用提供的关键字（没有一些复杂性），这似乎微不足道且有用。

因此，对于@martineau，我为我的评论感到困惑表示歉意，因为只有这么多的空间可以键入它们，并且显示得更加详细。

不正确地使用new来生成类？

3 个答案:

不正确地使用__new__来生成类？

3 个答案:

不正确地使用new来生成类？