Jusene's Blog

python3标准库学习之itertools

字数统计: 2k阅读时长: 11 min
2017/10/19 Share

迭代器

python迭代器,可以直接用于for循环的对象称为可迭代对象,可以直接被next()函数调用并不断返回下一个值的对象称为迭代器,所以可迭代对象可以通过iter()来转变为迭代器。我们前面说的生成器也是迭代器的一种。

简单示例:

1
>>> lst=[1,2,3,4]
2
>>> next(lst)
3
Traceback (most recent call last):
4
  File "<stdin>", line 1, in <module>
5
TypeError: 'list' object is not an iterator
6
>>> iter(lst)
7
<list_iterator object at 0x10c3d1e48>
8
>>> glst=iter(lst)
9
>>> next(glst)
10
1
11
>>> next(glst)
12
2

自己实现一个迭代器:

1
class Iterable:
2
	def __iter__(self):
3
		return Iterator()
4
5
class Iterator:
6
	def __init__(self):
7
		self.start=-2
8
	def __next__(self):
9
		self.start+=2
10
		return self.start
11
12
itera=Iterable()
13
for i in itera:
14
	print(i)

itertools

itertools是python标准库,目的是实现高效循环的迭代器。

count()

1
# count(start,[step=1]) 返回起始值和步进值,持续迭代
2
>>> import itertools
3
>>> itertools.count(100)
4
count(100)
5
>>> gen=itertools.count(100)
6
>>> next(gen)
7
100
8
>>> next(gen)
9
101
10
>>> gen=itertools.count()
11
>>> next(gen)
12
0
13
>>> next(gen)
14
1
15
16
## 生成器实现
17
>>> def count(start=0,step=1):
18
...     ret=start
19
...     while True:
20
...             yield x
21
...             x+=step

cycle()

1
# cycle(iterable)  重复循环迭代iterable,iterable为可迭代对象
2
>>> import itertools
3
>>> gen=itertools.cycle([1,2])
4
>>> next(gen)
5
1
6
>>> next(gen)
7
2
8
>>> next(gen)
9
1
10
>>> next(gen)
11
2
12
13
## 生成器实现
14
>>> def cycle(iterable):
15
...		ret=iterable
16
...		while True:
17
...			for i in ret:
18
...				yield i
19
...		else:
20
...			ret=iterable

repeat()

1
# repeat(elem[,times]) 如果未指定times,将一直迭代elem
2
>>> import itertools
3
>>> gen=itertools.repeat('n')
4
>>> next(gen)
5
'n'
6
>>> next(gen)
7
'n'
8
>>> next(gen)
9
'n'
10
>>> gen=itertools.repeat('n',1)
11
>>> next(gen)
12
'n'
13
>>> next(gen)
14
Traceback (most recent call last):
15
  File "<stdin>", line 1, in <module>
16
StopIteration
17
18
## 生成器实现
19
>>> def repeat(elem,times=None):
20
...		if times is None:
21
...			while 1:
22
...				yield elem
23
...		else:
24
...			for _ in range(times):
25
...				yield elem

accumulate()

1
# accumulate(iterable,[,func]) 迭代输出循环的和
2
>>> import itertools
3
>>> gen=itertools.accumulate('ab')
4
>>> next(gen)
5
'a'
6
>>> next(gen)
7
'ab'
8
>>> next(gen)
9
Traceback (most recent call last):
10
  File "<stdin>", line 1, in <module>
11
StopIteration
12
>>> gen=itertools.accumulate('ab',lambda x,y:x+y)
13
>>> next(gen)
14
'a'
15
>>> next(gen)
16
'ab'
17
>>> next(gen)
18
Traceback (most recent call last):
19
  File "<stdin>", line 1, in <module>
20
StopIteration
21
22
## 生成器实现
23
def func(total,element):
24
	return total+element
25
def accumulate(iterable,func=func):
26
	it=iter(iterable)
27
	total=next(it)
28
	yield total
29
	for i in it:
30
		total=func(total,i)
31
		yield total

chain()

1
# chain(iterable,iterable...)  将多个迭代对象合并,以迭代器输出每个迭代对象的要素
2
>>> gen=itertools.chain('a',[12,])
3
>>> next(gen)
4
'a'
5
>>> next(gen)
6
12
7
>>> next(gen)
8
Traceback (most recent call last):
9
  File "<stdin>", line 1, in <module>
10
StopIteration
11
12
## 生成器实现
13
def chain(*args):
14
	for it in args:
15
		for i in it:
16
			yield i

chain.from_iterable()

1
# chain.from_iterable(iterable) 将一个可迭代对象内的要素,以chain的方法重新迭代输出
2
>>> gen=itertools.chain.from_iterable(['ab','ef'])
3
>>> next(gen)
4
'a'
5
>>> next(gen)
6
'b'
7
>>> next(gen)
8
'e'
9
>>> next(gen)
10
'f'
11
12
## 生成器实现
13
def from_iterable(iterable):
14
	for it in iterable:
15
		for i in it:
16
			yield i

compress()

1
# compress(iterable,selectors) 根据selectors返回相应的iterable对应索引的值
2
>>> gen=itertools.compress('abcd',[1,0,1,0])
3
>>> next(gen)
4
'a'
5
>>> next(gen)
6
'c'
7
>>> next(gen)
8
Traceback (most recent call last):
9
  File "<stdin>", line 1, in <module>
10
StopIteration
11
>>> 
12
13
## 生成器实现
14
def compress(iterable,selectors):
15
	s=selectors
16
	d=iterable
17
	return (d[i] for i,v in enumerate(s) if v)

dropwhile()

1
# dropwhile(fun,seq) 迭代seq在fun中返回为假后的全部要素
2
>>> gen=itertools.dropwhile(lambda x: x<5,[1,2,3,4,5,6,1,2])
3
>>> next(gen)
4
5
5
>>> next(gen)
6
6
7
>>> next(gen)
8
1
9
>>> next(gen)
10
2
11
>>> next(gen)
12
Traceback (most recent call last):
13
  File "<stdin>", line 1, in <module>
14
StopIteration
15
16
def dropwhile(predicate, iterable):
17
    # dropwhile(lambda x: x<5, [1,4,6,4,1]) --> 6 4 1
18
    iterable = iter(iterable)
19
    for x in iterable:
20
        if not predicate(x):
21
            yield x
22
            break
23
    for x in iterable:
24
        yield x

filterfalse()

1
# filterfalse(predicate, iterable) 当predicate为假,迭代假的要素
2
>>> gen=itertools.filterfalse(lambda x: x<8,[1,2,3,4,5,6,7,8,9,1,2])
3
>>> next(gen)
4
8
5
>>> next(gen)
6
9
7
>>> next(gen)
8
Traceback (most recent call last):
9
  File "<stdin>", line 1, in <module>
10
StopIteration

groupby

1
# groupby(iterable[, key]) 按键分组的子迭代器
2
3
for key,items in groupby(data,key=lambda u:u)
4
	print(key)
5
	for item in items:
6
		print(item)
7
8
group只检查相连的项
9
10
实战: 按客户ip分组nginx的日志
11
12
from itertools import groupby
13
with open('access.log') as f:
14
        data=f.readlines()
15
16
data.sort()
17
for key,items in groupby(data,lambda x:x.split()[0]):
18
        print(key)
19
        count=0
20
        for item in items:
21
        		count+=1
22
                #print(item)	
23
        print(count)

islince()

1
# islince(seq,[start,]stop[,step]) 切割并迭代
2
>>> gen=itertools.islice('abcdf',0,None,2)
3
>>> next(gen)
4
'a'
5
>>> next(gen)
6
'c'
7
>>> next(gen)
8
'f'
9
>>> next(gen)
10
Traceback (most recent call last):
11
  File "<stdin>", line 1, in <module>
12
StopIteration

starmap()

1
# starmap 类似map的作用,但是此方法可以传递多个参数
2
>>> def fun(x,y):
3
...     return x+y
4
... 
5
>>> gen=itertools.starmap(fun,[(1,2)]
6
>>> list(gen)
7
3
8
>>> list(map(fun,[(1,2)]))
9
Traceback (most recent call last):
10
  File "<stdin>", line 1, in <module>
11
TypeError: fun() missing 1 required positional argument: 'y'

takewhile()

1
# takewhile(pred, seq)  迭代直到pred返回false  与dropwhile相反
2
>>> gen=itertools.takewhile(lambda x:x<3,[1,2,3,4,5,1,2])  
3
>>> next(gen)
4
1
5
>>> next(gen)
6
2
7
>>> next(gen)
8
Traceback (most recent call last):
9
  File "<stdin>", line 1, in <module>
10
StopIteration

tee()

1
# tee(it,n) 将一份迭代器分割成多份
2
>>> gen=itertools.takewhile(lambda x:x<5,[1,2,3,4,5,4,3,2,1])
3
>>> a,b=itertools.tee(gen,2)
4
>>> list(a)
5
[1, 2, 3, 4]
6
>>> list(b)
7
[1, 2, 3, 4]
8
>>> list(gen)
9
[]

zip_longest()

1
# zip_longest(p,q) zip的扩展函数
2
>>> zip(['x','y'],[1,2,3,4])
3
<zip object at 0x104925088>
4
>>> list(zip(['x','y'],[1,2,3,4]))
5
[('x', 1), ('y', 2)]
6
>>> zip(['x','y'],[1,2,3,4])
7
KeyboardInterrupt
8
>>> list(itertools.zip_longest(['x','y'],[1,2,3,4],fillvalue='*'))
9
[('x', 1), ('y', 2), ('*', 3), ('*', 4)]

组合迭代器

product()

1
# product(p,q,...[repeat=1]) 笛卡尔积
2
>>> list(itertools.product(['a','b','c'],[1,2,3]))
3
[('a', 1), ('a', 2), ('a', 3), ('b', 1), ('b', 2), ('b', 3), ('c', 1), ('c', 2), ('c', 3)]
4
>>> list(itertools.product(['a','b','c'],repeat=2))
5
[('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'a'), ('b', 'b'), ('b', 'c'), ('c', 'a'), ('c', 'b'), ('c', 'c')]

permutations()

1
# permutations(p[,r]) 返回r长度的元组,全部可能的排序,没有重复的元素
2
>>> list(itertools.permutations('abc'))
3
[('a', 'b', 'c'), ('a', 'c', 'b'), ('b', 'a', 'c'), ('b', 'c', 'a'), ('c', 'a', 'b'), ('c', 'b', 'a')]
4
>>> list(itertools.permutations('abc',2))
5
[('a', 'b'), ('a', 'c'), ('b', 'a'), ('b', 'c'), ('c', 'a'), ('c', 'b')]

combinations()

1
# combinations(p,r) 返回r长度的元组,元组元素经过重新排序的排序,没有重复的元素
2
>>> list(itertools.combinations('abc',3))
3
[('a', 'b', 'c')]
4
>>> list(itertools.combinations('abc',2))
5
[('a', 'b'), ('a', 'c'), ('b', 'c')]

combinations_with_replacement()

1
# combinations_with_replacement(p,r)  返回r长度的元组,元组元素经过重新排序的排序,有重复的元素
2
>>> list(itertools.combinations_with_replacement('abc',3))
3
[('a', 'a', 'a'), ('a', 'a', 'b'), ('a', 'a', 'c'), ('a', 'b', 'b'), ('a', 'b', 'c'), ('a', 'c', 'c'), ('b', 'b', 'b'), ('b', 'b', 'c'), ('b', 'c', 'c'), ('c', 'c', 'c')]
4
>>> list(itertools.combinations_with_replacement('abc',2))
5
[('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'b'), ('b', 'c'), ('c', 'c')]
CATALOG
  1. 1. 迭代器
  2. 2. itertools
    1. 2.1. count()
    2. 2.2. cycle()
    3. 2.3. repeat()
    4. 2.4. accumulate()
    5. 2.5. chain()
    6. 2.6. chain.from_iterable()
    7. 2.7. compress()
    8. 2.8. dropwhile()
    9. 2.9. filterfalse()
    10. 2.10. groupby
    11. 2.11. islince()
    12. 2.12. starmap()
    13. 2.13. takewhile()
    14. 2.14. tee()
    15. 2.15. zip_longest()
  3. 3. 组合迭代器
    1. 3.1. product()
    2. 3.2. permutations()
    3. 3.3. combinations()
    4. 3.4. combinations_with_replacement()