sirfz/tesserocr
View on GitHub(Parallelization) TypeError: no default __reduce__ due to non-trivial __cinit__
Open
#204 opened on Nov 19, 2019
help wanted
Description
I was trying to setup Dask to do parallelization when running OCR on multiple documents however it doesn't seem like the PyTessBaseAPI class is pickleable. Is this something that could be implemented in the future or is there some alternative for multiprocessing that is supported that I am missing?
TypeError Traceback (most recent call last)
<ipython-input-8-eed971444e6e> in <module>
----> 1 client.map(t.extract, ["samples/statement.pdf"])
//anaconda3/envs/eve/lib/python3.7/site-packages/distributed/client.py in map(self, func, key, workers, retries, resources, priority, allow_other_workers, fifo_timeout, actor, actors, pure, *iterables, **kwargs)
1672 user_priority=priority,
1673 fifo_timeout=fifo_timeout,
-> 1674 actors=actor,
1675 )
1676 logger.debug("map(%s, ...)", funcname(func))
//anaconda3/envs/eve/lib/python3.7/site-packages/distributed/client.py in _graph_to_futures(self, dsk, keys, restrictions, loose_restrictions, priority, user_priority, resources, retries, fifo_timeout, actors)
2486 {
2487 "op": "update-graph",
-> 2488 "tasks": valmap(dumps_task, dsk3),
2489 "dependencies": dependencies,
2490 "keys": list(flatkeys),
//anaconda3/envs/eve/lib/python3.7/site-packages/cytoolz/dicttoolz.pyx in cytoolz.dicttoolz.valmap()
//anaconda3/envs/eve/lib/python3.7/site-packages/cytoolz/dicttoolz.pyx in cytoolz.dicttoolz.valmap()
//anaconda3/envs/eve/lib/python3.7/site-packages/distributed/worker.py in dumps_task(task)
3236 return d
3237 elif not any(map(_maybe_complex, task[1:])):
-> 3238 return {"function": dumps_function(task[0]), "args": warn_dumps(task[1:])}
3239 return to_serialize(task)
3240
//anaconda3/envs/eve/lib/python3.7/site-packages/distributed/worker.py in dumps_function(func)
3201 result = cache[func]
3202 except KeyError:
-> 3203 result = pickle.dumps(func)
3204 if len(result) < 100000:
3205 cache[func] = result
//anaconda3/envs/eve/lib/python3.7/site-packages/distributed/protocol/pickle.py in dumps(x)
49 except Exception:
50 try:
---> 51 return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
52 except Exception as e:
53 logger.info("Failed to serialize %s. Exception: %s", x, e)
//anaconda3/envs/eve/lib/python3.7/site-packages/cloudpickle/cloudpickle.py in dumps(obj, protocol)
1123 try:
1124 cp = CloudPickler(file, protocol=protocol)
-> 1125 cp.dump(obj)
1126 return file.getvalue()
1127 finally:
//anaconda3/envs/eve/lib/python3.7/site-packages/cloudpickle/cloudpickle.py in dump(self, obj)
480 self.inject_addons()
481 try:
--> 482 return Pickler.dump(self, obj)
483 except RuntimeError as e:
484 if 'recursion' in e.args[0]:
//anaconda3/envs/eve/lib/python3.7/pickle.py in dump(self, obj)
435 if self.proto >= 4:
436 self.framer.start_framing()
--> 437 self.save(obj)
438 self.write(STOP)
439 self.framer.end_framing()
//anaconda3/envs/eve/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
//anaconda3/envs/eve/lib/python3.7/site-packages/cloudpickle/cloudpickle.py in save_instancemethod(self, obj)
888 else:
889 if PY3: # pragma: no branch
--> 890 self.save_reduce(types.MethodType, (obj.__func__, obj.__self__), obj=obj)
891 else:
892 self.save_reduce(
//anaconda3/envs/eve/lib/python3.7/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
636 else:
637 save(func)
--> 638 save(args)
639 write(REDUCE)
640
//anaconda3/envs/eve/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
//anaconda3/envs/eve/lib/python3.7/pickle.py in save_tuple(self, obj)
772 if n <= 3 and self.proto >= 2:
773 for element in obj:
--> 774 save(element)
775 # Subtle. Same as in the big comment below.
776 if id(obj) in memo:
//anaconda3/envs/eve/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
547
548 # Save the reduce() output and finally memoize the object
--> 549 self.save_reduce(obj=obj, *rv)
550
551 def persistent_id(self, obj):
//anaconda3/envs/eve/lib/python3.7/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
660
661 if state is not None:
--> 662 save(state)
663 write(BUILD)
664
//anaconda3/envs/eve/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
//anaconda3/envs/eve/lib/python3.7/pickle.py in save_dict(self, obj)
857
858 self.memoize(obj)
--> 859 self._batch_setitems(obj.items())
860
861 dispatch[dict] = save_dict
//anaconda3/envs/eve/lib/python3.7/pickle.py in _batch_setitems(self, items)
883 for k, v in tmp:
884 save(k)
--> 885 save(v)
886 write(SETITEMS)
887 elif n:
//anaconda3/envs/eve/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
522 reduce = getattr(obj, "__reduce_ex__", None)
523 if reduce is not None:
--> 524 rv = reduce(self.proto)
525 else:
526 reduce = getattr(obj, "__reduce__", None)
//anaconda3/envs/eve/lib/python3.7/site-packages/tesserocr.cpython-37m-darwin.so in tesserocr.PyTessBaseAPI.__reduce_cython__()
TypeError: no default __reduce__ due to non-trivial __cinit__