if __name__ == '__main__': print'start' global_var = "I'm a global variable" p_pool = Pool(5) for i in range(5): p_pool.apply_async(func=multi_task) p_pool.close() p_pool.join() print'end' # output """ start end """
# def multi_task(): # print global_var """ start I'm a global variable I'm a global variable I'm a global variable I'm a global variable I'm a global variable end """
执行后会发现输出内容只有start和end,从0-4的数字并没有打印出来,并且连不变的全局变量global_var都没有打印,看起来是没有执行multi_task(实际上是执行了multi_task。如果multi_task有返回值,并且在main中用Pool的get方法获取返回值时会报错NameError: global name 'i' is not defined)。但是如果在multi_task里面删掉可变全局变量i,那么全局变量global_var还是能打印出来的
if __name__ == '__main__': print'start' global_var = "I'm a global variable" p_list = [] for i in range(5): p = Process(target=multi_task) p.start() p_list.append(p) for p in p_list: p.join() print'end'
# output """ start 0 | I'm a global variable 1 | I'm a global variable 2 | I'm a global variable 3 | I'm a global variable 4 | I'm a global variable end """
if __name__ == '__main__': print'start' global_var = "I'm a global variable" p_pool = Pool(5) for i in range(5): p_pool.apply_async(func=multi_task, args=(i,)) p_pool.close() p_pool.join() print'end' # output """ start 0 | I'm a global variable 1 | I'm a global variable 3 | I'm a global variable 2 | I'm a global variable 4 | I'm a global variable end """
defsome_method(self): print'start' p_pool = Pool(5) for i in range(5): p_pool.apply_async(func=self.multi_task, args=(i,)) p_pool.close() p_pool.join() print'end'
defmulti_task(self, var): print var, '|', global_var
defsome_method(self): print'start' p_pool = Pool(5) for i in range(5): p_pool.apply_async(func=multi_task, args=(i,)) p_pool.close() p_pool.join() print'end'
defmulti_task(var): print var, '|', global_var
cls = SomeClass() cls.some_method()
# output """ start 0 | I'm a global variable 1 | I'm a global variable 3 | I'm a global variable 2 | I'm a global variable 4 | I'm a global variable end """
在实践中发现,使用Manager的list时,虽然使用方法上和普通list一样,但可能因为多进程之间通信的缘故,list中每个元素大小存在限制。因为将之前单进程的代码修改为多进程后,出现报错OverflowError: cannot serialize a string larger than 2 GiB multiprocessing,经查看发现这是在某处给Manager的list添加元素时发生的,并且该元素的确很大,而原来单进程的时候却没有出现过该问题,所以遂产生刚才的猜想(因为没有找到相关解释)。我的解决办法就是把该超大元素切分后再分别添加到Manager的list中
defyour_code(): p_list = [] # 先将多进程所要执行的任务的所有参数放入队列中 all_task = Queue() for task_param in range(7): all_task.put(task_param) # 结果存储 result = Manager().list() # 启动多进程 for i in range(3): p = Process(target=process_pool, args=(all_task, result)) p.start() p_list.append(p) for p in p_list: p.join() print result
if __name__ == '__main__': print'start' your_code() print'end'
# output """ start Task 0(pid is 14642) will run 3.78 seconds Task 1(pid is 14643) will run 4.08 seconds Task 2(pid is 14644) will run 4.62 seconds Task 0' result is 0 Task 3(pid is 14642) will run 1.58 seconds Task 1' result is 1 Task 4(pid is 14643) will run 1.20 seconds Task 2' result is 4 Task 5(pid is 14644) will run 2.74 seconds Task 4' result is 16 Task 6(pid is 14643) will run 2.07 seconds Task 3' result is 9 Task 6' result is 36 Task 5' result is 25 [0, 1, 4, 16, 9, 36, 25] end """