sirfz/tesserocr

(Parallelization) TypeError: no default __reduce__ due to non-trivial __cinit__

Open

#204 建立於 2019年11月19日

在 GitHub 查看
 (3 留言) (0 反應) (0 負責人)Python (1,896 star) (257 fork)batch import
help wanted

描述

I was trying to setup Dask to do parallelization when running OCR on multiple documents however it doesn't seem like the PyTessBaseAPI class is pickleable. Is this something that could be implemented in the future or is there some alternative for multiprocessing that is supported that I am missing?

TypeError                                 Traceback (most recent call last)
<ipython-input-8-eed971444e6e> in <module>
----> 1 client.map(t.extract, ["samples/statement.pdf"])

//anaconda3/envs/eve/lib/python3.7/site-packages/distributed/client.py in map(self, func, key, workers, retries, resources, priority, allow_other_workers, fifo_timeout, actor, actors, pure, *iterables, **kwargs)
   1672             user_priority=priority,
   1673             fifo_timeout=fifo_timeout,
-> 1674             actors=actor,
   1675         )
   1676         logger.debug("map(%s, ...)", funcname(func))

//anaconda3/envs/eve/lib/python3.7/site-packages/distributed/client.py in _graph_to_futures(self, dsk, keys, restrictions, loose_restrictions, priority, user_priority, resources, retries, fifo_timeout, actors)
   2486                 {
   2487                     "op": "update-graph",
-> 2488                     "tasks": valmap(dumps_task, dsk3),
   2489                     "dependencies": dependencies,
   2490                     "keys": list(flatkeys),

//anaconda3/envs/eve/lib/python3.7/site-packages/cytoolz/dicttoolz.pyx in cytoolz.dicttoolz.valmap()

//anaconda3/envs/eve/lib/python3.7/site-packages/cytoolz/dicttoolz.pyx in cytoolz.dicttoolz.valmap()

//anaconda3/envs/eve/lib/python3.7/site-packages/distributed/worker.py in dumps_task(task)
   3236             return d
   3237         elif not any(map(_maybe_complex, task[1:])):
-> 3238             return {"function": dumps_function(task[0]), "args": warn_dumps(task[1:])}
   3239     return to_serialize(task)
   3240

//anaconda3/envs/eve/lib/python3.7/site-packages/distributed/worker.py in dumps_function(func)
   3201         result = cache[func]
   3202     except KeyError:
-> 3203         result = pickle.dumps(func)
   3204         if len(result) < 100000:
   3205             cache[func] = result

//anaconda3/envs/eve/lib/python3.7/site-packages/distributed/protocol/pickle.py in dumps(x)
     49     except Exception:
     50         try:
---> 51             return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
     52         except Exception as e:
     53             logger.info("Failed to serialize %s. Exception: %s", x, e)

//anaconda3/envs/eve/lib/python3.7/site-packages/cloudpickle/cloudpickle.py in dumps(obj, protocol)
   1123     try:
   1124         cp = CloudPickler(file, protocol=protocol)
-> 1125         cp.dump(obj)
   1126         return file.getvalue()
   1127     finally:

//anaconda3/envs/eve/lib/python3.7/site-packages/cloudpickle/cloudpickle.py in dump(self, obj)
    480         self.inject_addons()
    481         try:
--> 482             return Pickler.dump(self, obj)
    483         except RuntimeError as e:
    484             if 'recursion' in e.args[0]:

//anaconda3/envs/eve/lib/python3.7/pickle.py in dump(self, obj)
    435         if self.proto >= 4:
    436             self.framer.start_framing()
--> 437         self.save(obj)
    438         self.write(STOP)
    439         self.framer.end_framing()

//anaconda3/envs/eve/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506

//anaconda3/envs/eve/lib/python3.7/site-packages/cloudpickle/cloudpickle.py in save_instancemethod(self, obj)
    888         else:
    889             if PY3:  # pragma: no branch
--> 890                 self.save_reduce(types.MethodType, (obj.__func__, obj.__self__), obj=obj)
    891             else:
    892                 self.save_reduce(

//anaconda3/envs/eve/lib/python3.7/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    636         else:
    637             save(func)
--> 638             save(args)
    639             write(REDUCE)
    640

//anaconda3/envs/eve/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506

//anaconda3/envs/eve/lib/python3.7/pickle.py in save_tuple(self, obj)
    772         if n <= 3 and self.proto >= 2:
    773             for element in obj:
--> 774                 save(element)
    775             # Subtle.  Same as in the big comment below.
    776             if id(obj) in memo:

//anaconda3/envs/eve/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    547
    548         # Save the reduce() output and finally memoize the object
--> 549         self.save_reduce(obj=obj, *rv)
    550
    551     def persistent_id(self, obj):

//anaconda3/envs/eve/lib/python3.7/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    660
    661         if state is not None:
--> 662             save(state)
    663             write(BUILD)
    664

//anaconda3/envs/eve/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506

//anaconda3/envs/eve/lib/python3.7/pickle.py in save_dict(self, obj)
    857
    858         self.memoize(obj)
--> 859         self._batch_setitems(obj.items())
    860
    861     dispatch[dict] = save_dict

//anaconda3/envs/eve/lib/python3.7/pickle.py in _batch_setitems(self, items)
    883                 for k, v in tmp:
    884                     save(k)
--> 885                     save(v)
    886                 write(SETITEMS)
    887             elif n:

//anaconda3/envs/eve/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    522             reduce = getattr(obj, "__reduce_ex__", None)
    523             if reduce is not None:
--> 524                 rv = reduce(self.proto)
    525             else:
    526                 reduce = getattr(obj, "__reduce__", None)

//anaconda3/envs/eve/lib/python3.7/site-packages/tesserocr.cpython-37m-darwin.so in tesserocr.PyTessBaseAPI.__reduce_cython__()

TypeError: no default __reduce__ due to non-trivial __cinit__

貢獻者指南