51042-notes/06.generators-comprehensions.ipynb
2024-10-27 19:11:35 -05:00

1056 lines
20 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "2663b868-8c7e-4776-9c3b-0cc03d581ee5",
"metadata": {},
"source": [
"## Generators & Comprehensions\n",
"\n",
"### Generator Functions\n",
"\n",
"A generator function works differently from all of the functions we've seen before. It allows the function to return (using the `yield` statement) and resume where it left off, with internal state intact.\n",
"\n",
"Between calls to the generator function, state is suspended."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "f6f883d1-88fd-4488-8af7-17c15a47e933",
"metadata": {},
"outputs": [],
"source": [
"def simple_generator_func():\n",
" print(\"start\")\n",
" yield 1\n",
" print(\"still running\")\n",
" yield 2\n",
" print(\"one more\")\n",
" yield 3"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "2260b250-1ae6-4d3b-ab05-314ddad19256",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"generator"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# return type of a yielding function is a generator\n",
"type(simple_generator_func())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c051b45d-9a37-4086-b90c-2090110c8cc6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"g = simple_generator_func()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "0d55a761-1496-46dc-bafe-4eb121ad0ca7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"start\n",
"value from generator = 1\n",
"between loops\n",
"still running\n",
"2\n",
"one more\n",
"3\n"
]
}
],
"source": [
"for x in g:\n",
" print(\"value from generator = \", x)\n",
" break\n",
"print(\"between loops\")\n",
"for x in g:\n",
" print(x)"
]
},
{
"cell_type": "markdown",
"id": "427fe881-459e-4a20-acfa-dae65a1b906c",
"metadata": {
"tags": []
},
"source": [
"##"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "6a87d7d2-9eb5-4415-9770-79d9594aafa5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"start\n",
"yielded value= 1\n",
"still running\n",
"yielded value= 2\n",
"one more\n",
"next= 3\n"
]
}
],
"source": [
"g = simple_generator_func()\n",
"for x in g:\n",
" print(\"yielded value=\", x)\n",
" if x == 2:\n",
" break\n",
"\n",
"print(\"next=\", next(g))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "544d5a37-a1c0-47f6-b270-429ea2215386",
"metadata": {},
"outputs": [],
"source": [
"def evens_up_to(n): \n",
" for i in range(2, n + 1):\n",
" if i % 2 == 0:\n",
" yield i"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "7fbc6cf5-0995-4d20-b47d-6b5454899168",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2\n",
"4\n",
"6\n",
"8\n",
"10\n",
"12\n",
"14\n",
"16\n",
"18\n",
"20\n",
"22\n",
"24\n",
"26\n",
"28\n",
"30\n",
"32\n",
"34\n",
"36\n",
"38\n",
"40\n",
"42\n"
]
}
],
"source": [
"#n = 0\n",
"for evens in evens_up_to(2000000000000000000):\n",
" # n += 1\n",
" print(evens)\n",
" if evens > 40:\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "5af8b087-6b63-4948-811d-5ba4e0a8ba55",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"range(0, 100000000000000000000)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(range(100000000000000000000))"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "2fbf706b-6f6c-4a24-890e-10121d709582",
"metadata": {},
"outputs": [],
"source": [
"# Generators do not have to ever exit, here is an infinite generator\n",
"def powers_of_two():\n",
" n = 2\n",
" while True:\n",
" yield n\n",
" n *= 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "32920b91-7b00-4b2a-8927-07bd5cbc6fa2",
"metadata": {},
"outputs": [],
"source": [
"list(powers_of_two())"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "27a8bd75-3912-4f97-9655-763279f07f9b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2\n",
"4\n",
"8\n",
"16\n",
"32\n",
"64\n"
]
}
],
"source": [
"for x in powers_of_two():\n",
" if x > 100:\n",
" break\n",
" print(x)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "5c3b2178-65fc-43d2-8940-d91bf58e1881",
"metadata": {},
"outputs": [],
"source": [
"g = powers_of_two()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "396ce187-a2e3-4611-a188-9403f17cba34",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"16384"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dd4f4d70-e74a-43d3-88bc-7dfe04add2a2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"for x in range(100000000000000000000000000000000):\n",
" if x > 5:\n",
" break\n",
" print(x)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5ba1490-5fe1-4120-95a0-f60f85559959",
"metadata": {},
"outputs": [],
"source": [
"y = list(range(100000))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb4db8a4-36da-4d56-b8bd-c10b6e01fc73",
"metadata": {},
"outputs": [],
"source": [
"len(y)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "9be584c1-4cf9-4ad3-88ab-4b5da18e42da",
"metadata": {},
"outputs": [],
"source": [
"g = powers_of_two()\n",
"h = powers_of_two()\n",
"k = h"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "81095f30-547f-4624-b2a4-9cc5997c4a8b",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g is not h"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "c9a9fa0a-e699-4a56-9e9a-cd7e07f1a446",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"128"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(h)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "9b019479-cb3a-41b4-8ef8-a62e61b56430",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"256"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(k)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "07cb4313-a300-47c3-b873-b074399a2fad",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4427895344"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"id(g)\n"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "3e2ea606-1041-4171-9096-837fe1eaacdd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4427893440"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"id(h)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "e34470cd-efdd-45d0-a1b5-58e478260e4c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4096"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "2e656696-8e3e-4dfd-86f9-9b87a08debe3",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"128"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(h)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39b39657-243c-4641-9859-90fdb7adb3fc",
"metadata": {},
"outputs": [],
"source": [
"r = range(100)\n",
"print(r)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f746d52-4f7f-4a97-aaeb-330e99aaf722",
"metadata": {},
"outputs": [],
"source": [
"# what happens if the parameter changes during iteration?\n",
"def gen_test(param):\n",
" # param = ...bound...\n",
" i = 0\n",
" while i < param:\n",
" yield i\n",
" i += 1\n",
"\n",
"bound = 5\n",
"for x in gen_test(bound):\n",
" print(\"x is \", x, \"bound is\", bound)\n",
" bound.append(4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "66d6ae08-da37-4f90-8a77-db08205c3b17",
"metadata": {},
"outputs": [],
"source": [
"def gen_test2():\n",
" yield 1\n",
" yield 2\n",
" yield 3"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88b2aaee-b02e-44f5-8a12-2ba2a539978c",
"metadata": {},
"outputs": [],
"source": [
"g = gen_test2()\n",
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e47ea06-b792-415c-9af9-a850b82b3c95",
"metadata": {},
"outputs": [],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2a7803f2-4751-4f41-b1e4-04a212da71aa",
"metadata": {},
"outputs": [],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8081044-cab9-4cd3-9f03-d50fc5ad1362",
"metadata": {},
"outputs": [],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "40156c07-49d1-4753-9f26-0fe2481e9a42",
"metadata": {},
"outputs": [],
"source": [
"x = [1, 2, 3]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee0103a6-daf2-457b-af6a-913d7fdb9f92",
"metadata": {},
"outputs": [],
"source": [
"g = iter(x)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6d86fd8-a441-48c2-847e-e56b576102ae",
"metadata": {},
"outputs": [],
"source": [
"def newrange(lower_bound, upper_bound):\n",
" i = lower_bound\n",
" while i < upper_bound:\n",
" yield i\n",
" i += 1"
]
},
{
"cell_type": "markdown",
"id": "308fc5a9-78ac-40cd-afac-970614e9460e",
"metadata": {},
"source": [
"### Discussion: Benefits of Generators"
]
},
{
"cell_type": "markdown",
"id": "4fefa3b8-0727-4586-97df-698b91d91c6f",
"metadata": {},
"source": [
"- Avoids creating entire collections up front.\n",
"\n",
"- Can result in drastic memory savings.\n",
"\n",
" - How much memory does `range(100000)` need?\n",
"\n",
"- Avoids doing expensive computations until necessary."
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "229fa10f-331e-4b72-959e-d28b952760ee",
"metadata": {},
"source": [
"### Generator Expressions / Comprehensions\n",
"\n",
"Exact same syntax as a list comprehension, but results in a generator object.\n",
"\n",
"\n",
"```python\n",
"g = (expression for var in iterable)\n",
"\n",
"# or \n",
"\n",
"g = (expression for var in iterable if condition)\n",
"```\n",
"\n",
"Creates a generator that yields `expression` for each iteration of the for loop. (Optionally only if the `condition` is satisfied)\n",
"\n",
"Equivalent to:\n",
"\n",
"```python\n",
"def gf():\n",
" for var in iterable:\n",
" if condition:\n",
" yield expression\n",
"g = gf()\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "aa9b4c18-3d73-4d9c-8e78-3dba56b4a717",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<generator object <genexpr> at 0x107ec63b0>\n"
]
}
],
"source": [
"pow_generator = (i + 1 for i in powers_of_two())\n",
"print(pow_generator)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2e69c9d6-11ec-4e01-9116-95f55c8c2c7e",
"metadata": {
"tags": []
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 80,
"id": "729f6476-491a-4daf-b460-23f602ef18fa",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3\n",
"5\n",
"9\n",
"17\n",
"33\n",
"65\n",
"129\n",
"257\n",
"513\n",
"1025\n",
"2049\n",
"4097\n",
"8193\n",
"16385\n",
"32769\n",
"65537\n",
"131073\n",
"262145\n",
"524289\n",
"1048577\n"
]
}
],
"source": [
"# can be used just like other generators we've seen (as an iterable)\n",
"for i in pow_generator:\n",
" print(i)\n",
" # will run forever without this\n",
" if i > 1000000:\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "cfed52c0-0467-4b50-ac6c-66d6fa694ec6",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2097153\n",
"4194305\n",
"8388609\n",
"16777217\n"
]
}
],
"source": [
"for x in pow_generator:\n",
" print(x) # can resume the generator\n",
" if x > 10000000:\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "386c0eea-ea29-48bf-ba1b-dc174f340f18",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4294967297"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(pow_generator)"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "ac1be105-edc6-4b52-8715-e54c186861d2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0, 4, 16, 36, 64]"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# also possible to specify conditionals in generator expression, same as comprehension\n",
"gen2 = (i**2 for i in range(10) if i % 2 == 0)\n",
"list(gen2)"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "c721ff55-86ac-46b1-826f-4613905162c8",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# equivalent with map & filter\n",
"ll = range(10)\n",
"g = map(lambda x: x**2, filter(lambda x: x % 2 == 0, ll))"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "a5aba588-3823-4763-b1ab-5a6217f72f8e",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "2452f706-096d-4bca-aae7-cc6564e48a23",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": 95,
"id": "8a250ce6-7e8e-4915-9ea0-1751a96046f0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"16"
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": 98,
"id": "8949fbea-455c-483b-83b1-66b9c70fdf30",
"metadata": {},
"outputs": [
{
"ename": "StopIteration",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mStopIteration\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[98], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mg\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[0;31mStopIteration\u001b[0m: "
]
}
],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": 99,
"id": "832bd2a5-cf99-40bf-9dd3-7a2833589485",
"metadata": {},
"outputs": [],
"source": [
"ll = [1, 2, 3, 4]\n",
"g = iter(ll)\n"
]
},
{
"cell_type": "code",
"execution_count": 100,
"id": "01662fa6-dfb9-4609-a04f-9394c7b63a16",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": 101,
"id": "6c353acf-93a7-47c8-8016-90b9b407767d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": 102,
"id": "e0e9d459-7d96-4187-ba3e-8f8e6a69eb5d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"id": "11f9029d-5029-496c-894e-b416d5b308d5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": 104,
"id": "8a94d0a3-d09b-4ab7-a465-93c7a6e1511e",
"metadata": {},
"outputs": [
{
"ename": "StopIteration",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mStopIteration\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[104], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mg\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[0;31mStopIteration\u001b[0m: "
]
}
],
"source": [
"next(g)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7250129-eab1-4fec-bb46-917e7c11f01e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}