迭代器
python迭代器,可以直接用于for循环的对象称为可迭代对象,可以直接被next()
函数调用并不断返回下一个值的对象称为迭代器,所以可迭代对象可以通过iter()
来转变为迭代器。我们前面说的生成器也是迭代器的一种。
简单示例:
1 | 1,2,3,4] lst=[ |
2 | next(lst) |
3 | Traceback (most recent call last): |
4 | File "<stdin>", line 1, in <module> |
5 | TypeError: 'list' object is not an iterator |
6 | iter(lst) |
7 | <list_iterator object at 0x10c3d1e48> |
8 | glst=iter(lst) |
9 | next(glst) |
10 | 1 |
11 | next(glst) |
12 | 2 |
自己实现一个迭代器:
1 | class Iterable: |
2 | def __iter__(self): |
3 | return Iterator() |
4 | |
5 | class Iterator: |
6 | def __init__(self): |
7 | self.start=-2 |
8 | def __next__(self): |
9 | self.start+=2 |
10 | return self.start |
11 | |
12 | itera=Iterable() |
13 | for i in itera: |
14 | print(i) |
itertools
itertools是python标准库,目的是实现高效循环的迭代器。
count()
1 | # count(start,[step=1]) 返回起始值和步进值,持续迭代 |
2 | import itertools |
3 | 100) itertools.count( |
4 | count(100) |
5 | 100) gen=itertools.count( |
6 | next(gen) |
7 | 100 |
8 | next(gen) |
9 | 101 |
10 | gen=itertools.count() |
11 | next(gen) |
12 | 0 |
13 | next(gen) |
14 | 1 |
15 | |
16 | ## 生成器实现 |
17 | def count(start=0,step=1): |
18 | ret=start |
19 | while True: |
20 | yield x |
21 | x+=step |
cycle()
1 | # cycle(iterable) 重复循环迭代iterable,iterable为可迭代对象 |
2 | import itertools |
3 | 1,2]) gen=itertools.cycle([ |
4 | next(gen) |
5 | 1 |
6 | next(gen) |
7 | 2 |
8 | next(gen) |
9 | 1 |
10 | next(gen) |
11 | 2 |
12 | |
13 | ## 生成器实现 |
14 | def cycle(iterable): |
15 | ... ret=iterable |
16 | ... while True: |
17 | ... for i in ret: |
18 | ... yield i |
19 | ... else: |
20 | ... ret=iterable |
repeat()
1 | # repeat(elem[,times]) 如果未指定times,将一直迭代elem |
2 | import itertools |
3 | 'n') gen=itertools.repeat( |
4 | next(gen) |
5 | 'n' |
6 | next(gen) |
7 | 'n' |
8 | next(gen) |
9 | 'n' |
10 | 'n',1) gen=itertools.repeat( |
11 | next(gen) |
12 | 'n' |
13 | next(gen) |
14 | Traceback (most recent call last): |
15 | File "<stdin>", line 1, in <module> |
16 | StopIteration |
17 | |
18 | ## 生成器实现 |
19 | def repeat(elem,times=None): |
20 | ... if times is None: |
21 | ... while 1: |
22 | ... yield elem |
23 | ... else: |
24 | ... for _ in range(times): |
25 | ... yield elem |
accumulate()
1 | # accumulate(iterable,[,func]) 迭代输出循环的和 |
2 | import itertools |
3 | 'ab') gen=itertools.accumulate( |
4 | next(gen) |
5 | 'a' |
6 | next(gen) |
7 | 'ab' |
8 | next(gen) |
9 | Traceback (most recent call last): |
10 | File "<stdin>", line 1, in <module> |
11 | StopIteration |
12 | 'ab',lambda x,y:x+y) gen=itertools.accumulate( |
13 | next(gen) |
14 | 'a' |
15 | next(gen) |
16 | 'ab' |
17 | next(gen) |
18 | Traceback (most recent call last): |
19 | File "<stdin>", line 1, in <module> |
20 | StopIteration |
21 | |
22 | ## 生成器实现 |
23 | def func(total,element): |
24 | return total+element |
25 | def accumulate(iterable,func=func): |
26 | it=iter(iterable) |
27 | total=next(it) |
28 | yield total |
29 | for i in it: |
30 | total=func(total,i) |
31 | yield total |
chain()
1 | # chain(iterable,iterable...) 将多个迭代对象合并,以迭代器输出每个迭代对象的要素 |
2 | 'a',[12,]) gen=itertools.chain( |
3 | next(gen) |
4 | 'a' |
5 | next(gen) |
6 | 12 |
7 | next(gen) |
8 | Traceback (most recent call last): |
9 | File "<stdin>", line 1, in <module> |
10 | StopIteration |
11 | |
12 | ## 生成器实现 |
13 | def chain(*args): |
14 | for it in args: |
15 | for i in it: |
16 | yield i |
chain.from_iterable()
1 | # chain.from_iterable(iterable) 将一个可迭代对象内的要素,以chain的方法重新迭代输出 |
2 | 'ab','ef']) gen=itertools.chain.from_iterable([ |
3 | next(gen) |
4 | 'a' |
5 | next(gen) |
6 | 'b' |
7 | next(gen) |
8 | 'e' |
9 | next(gen) |
10 | 'f' |
11 | |
12 | ## 生成器实现 |
13 | def from_iterable(iterable): |
14 | for it in iterable: |
15 | for i in it: |
16 | yield i |
compress()
1 | # compress(iterable,selectors) 根据selectors返回相应的iterable对应索引的值 |
2 | 'abcd',[1,0,1,0]) gen=itertools.compress( |
3 | next(gen) |
4 | 'a' |
5 | next(gen) |
6 | 'c' |
7 | next(gen) |
8 | Traceback (most recent call last): |
9 | File "<stdin>", line 1, in <module> |
10 | StopIteration |
11 |
|
12 | |
13 | ## 生成器实现 |
14 | def compress(iterable,selectors): |
15 | s=selectors |
16 | d=iterable |
17 | return (d[i] for i,v in enumerate(s) if v) |
dropwhile()
1 | # dropwhile(fun,seq) 迭代seq在fun中返回为假后的全部要素 |
2 | lambda x: x<5,[1,2,3,4,5,6,1,2]) gen=itertools.dropwhile( |
3 | next(gen) |
4 | 5 |
5 | next(gen) |
6 | 6 |
7 | next(gen) |
8 | 1 |
9 | next(gen) |
10 | 2 |
11 | next(gen) |
12 | Traceback (most recent call last): |
13 | File "<stdin>", line 1, in <module> |
14 | StopIteration |
15 | |
16 | def dropwhile(predicate, iterable): |
17 | # dropwhile(lambda x: x<5, [1,4,6,4,1]) --> 6 4 1 |
18 | iterable = iter(iterable) |
19 | for x in iterable: |
20 | if not predicate(x): |
21 | yield x |
22 | break |
23 | for x in iterable: |
24 | yield x |
filterfalse()
1 | # filterfalse(predicate, iterable) 当predicate为假,迭代假的要素 |
2 | lambda x: x<8,[1,2,3,4,5,6,7,8,9,1,2]) gen=itertools.filterfalse( |
3 | next(gen) |
4 | 8 |
5 | next(gen) |
6 | 9 |
7 | next(gen) |
8 | Traceback (most recent call last): |
9 | File "<stdin>", line 1, in <module> |
10 | StopIteration |
groupby
1 | # groupby(iterable[, key]) 按键分组的子迭代器 |
2 | |
3 | for key,items in groupby(data,key=lambda u:u) |
4 | print(key) |
5 | for item in items: |
6 | print(item) |
7 | |
8 | group只检查相连的项 |
9 | |
10 | 实战: 按客户ip分组nginx的日志 |
11 | |
12 | from itertools import groupby |
13 | with open('access.log') as f: |
14 | data=f.readlines() |
15 | |
16 | data.sort() |
17 | for key,items in groupby(data,lambda x:x.split()[0]): |
18 | print(key) |
19 | count=0 |
20 | for item in items: |
21 | count+=1 |
22 | #print(item) |
23 | print(count) |
islince()
1 | # islince(seq,[start,]stop[,step]) 切割并迭代 |
2 | 'abcdf',0,None,2) gen=itertools.islice( |
3 | next(gen) |
4 | 'a' |
5 | next(gen) |
6 | 'c' |
7 | next(gen) |
8 | 'f' |
9 | next(gen) |
10 | Traceback (most recent call last): |
11 | File "<stdin>", line 1, in <module> |
12 | StopIteration |
starmap()
1 | # starmap 类似map的作用,但是此方法可以传递多个参数 |
2 | def fun(x,y): |
3 | return x+y |
4 |
|
5 | 1,2)] gen=itertools.starmap(fun,[( |
6 | list(gen) |
7 | 3 |
8 | 1,2)])) list(map(fun,[( |
9 | Traceback (most recent call last): |
10 | File "<stdin>", line 1, in <module> |
11 | TypeError: fun() missing 1 required positional argument: 'y' |
takewhile()
1 | # takewhile(pred, seq) 迭代直到pred返回false 与dropwhile相反 |
2 | lambda x:x<3,[1,2,3,4,5,1,2]) gen=itertools.takewhile( |
3 | next(gen) |
4 | 1 |
5 | next(gen) |
6 | 2 |
7 | next(gen) |
8 | Traceback (most recent call last): |
9 | File "<stdin>", line 1, in <module> |
10 | StopIteration |
tee()
1 | # tee(it,n) 将一份迭代器分割成多份 |
2 | lambda x:x<5,[1,2,3,4,5,4,3,2,1]) gen=itertools.takewhile( |
3 | 2) a,b=itertools.tee(gen, |
4 | list(a) |
5 | [1, 2, 3, 4] |
6 | list(b) |
7 | [1, 2, 3, 4] |
8 | list(gen) |
9 | [] |
zip_longest()
1 | # zip_longest(p,q) zip的扩展函数 |
2 | 'x','y'],[1,2,3,4]) zip([ |
3 | <zip object at 0x104925088> |
4 | 'x','y'],[1,2,3,4])) list(zip([ |
5 | [('x', 1), ('y', 2)] |
6 | 'x','y'],[1,2,3,4]) zip([ |
7 | KeyboardInterrupt |
8 | 'x','y'],[1,2,3,4],fillvalue='*')) list(itertools.zip_longest([ |
9 | [('x', 1), ('y', 2), ('*', 3), ('*', 4)] |
组合迭代器
product()
1 | # product(p,q,...[repeat=1]) 笛卡尔积 |
2 | 'a','b','c'],[1,2,3])) list(itertools.product([ |
3 | [('a', 1), ('a', 2), ('a', 3), ('b', 1), ('b', 2), ('b', 3), ('c', 1), ('c', 2), ('c', 3)] |
4 | 'a','b','c'],repeat=2)) list(itertools.product([ |
5 | [('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'a'), ('b', 'b'), ('b', 'c'), ('c', 'a'), ('c', 'b'), ('c', 'c')] |
permutations()
1 | # permutations(p[,r]) 返回r长度的元组,全部可能的排序,没有重复的元素 |
2 | 'abc')) list(itertools.permutations( |
3 | [('a', 'b', 'c'), ('a', 'c', 'b'), ('b', 'a', 'c'), ('b', 'c', 'a'), ('c', 'a', 'b'), ('c', 'b', 'a')] |
4 | 'abc',2)) list(itertools.permutations( |
5 | [('a', 'b'), ('a', 'c'), ('b', 'a'), ('b', 'c'), ('c', 'a'), ('c', 'b')] |
combinations()
1 | # combinations(p,r) 返回r长度的元组,元组元素经过重新排序的排序,没有重复的元素 |
2 | 'abc',3)) list(itertools.combinations( |
3 | [('a', 'b', 'c')] |
4 | 'abc',2)) list(itertools.combinations( |
5 | [('a', 'b'), ('a', 'c'), ('b', 'c')] |
combinations_with_replacement()
1 | # combinations_with_replacement(p,r) 返回r长度的元组,元组元素经过重新排序的排序,有重复的元素 |
2 | 'abc',3)) list(itertools.combinations_with_replacement( |
3 | [('a', 'a', 'a'), ('a', 'a', 'b'), ('a', 'a', 'c'), ('a', 'b', 'b'), ('a', 'b', 'c'), ('a', 'c', 'c'), ('b', 'b', 'b'), ('b', 'b', 'c'), ('b', 'c', 'c'), ('c', 'c', 'c')] |
4 | 'abc',2)) list(itertools.combinations_with_replacement( |
5 | [('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'b'), ('b', 'c'), ('c', 'c')] |