{ "cells": [ { "cell_type": "markdown", "id": "2663b868-8c7e-4776-9c3b-0cc03d581ee5", "metadata": {}, "source": [ "## Generators & Comprehensions\n", "\n", "### Generator Functions\n", "\n", "A generator function works differently from all of the functions we've seen before. It allows the function to return (using the `yield` statement) and resume where it left off, with internal state intact.\n", "\n", "Between calls to the generator function, state is suspended." ] }, { "cell_type": "code", "execution_count": 1, "id": "f6f883d1-88fd-4488-8af7-17c15a47e933", "metadata": {}, "outputs": [], "source": [ "def simple_generator_func():\n", " print(\"start\")\n", " yield 1\n", " print(\"still running\")\n", " yield 2\n", " print(\"one more\")\n", " yield 3" ] }, { "cell_type": "code", "execution_count": 4, "id": "2260b250-1ae6-4d3b-ab05-314ddad19256", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "generator" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# return type of a yielding function is a generator\n", "type(simple_generator_func())" ] }, { "cell_type": "code", "execution_count": 10, "id": "c051b45d-9a37-4086-b90c-2090110c8cc6", "metadata": { "tags": [] }, "outputs": [], "source": [ "g = simple_generator_func()" ] }, { "cell_type": "code", "execution_count": 11, "id": "0d55a761-1496-46dc-bafe-4eb121ad0ca7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "start\n", "value from generator = 1\n", "between loops\n", "still running\n", "2\n", "one more\n", "3\n" ] } ], "source": [ "for x in g:\n", " print(\"value from generator = \", x)\n", " break\n", "print(\"between loops\")\n", "for x in g:\n", " print(x)" ] }, { "cell_type": "markdown", "id": "427fe881-459e-4a20-acfa-dae65a1b906c", "metadata": { "tags": [] }, "source": [ "##" ] }, { "cell_type": "code", "execution_count": 15, "id": "6a87d7d2-9eb5-4415-9770-79d9594aafa5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "start\n", "yielded value= 1\n", "still running\n", "yielded value= 2\n", "one more\n", "next= 3\n" ] } ], "source": [ "g = simple_generator_func()\n", "for x in g:\n", " print(\"yielded value=\", x)\n", " if x == 2:\n", " break\n", "\n", "print(\"next=\", next(g))" ] }, { "cell_type": "code", "execution_count": 16, "id": "544d5a37-a1c0-47f6-b270-429ea2215386", "metadata": {}, "outputs": [], "source": [ "def evens_up_to(n): \n", " for i in range(2, n + 1):\n", " if i % 2 == 0:\n", " yield i" ] }, { "cell_type": "code", "execution_count": 19, "id": "7fbc6cf5-0995-4d20-b47d-6b5454899168", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2\n", "4\n", "6\n", "8\n", "10\n", "12\n", "14\n", "16\n", "18\n", "20\n", "22\n", "24\n", "26\n", "28\n", "30\n", "32\n", "34\n", "36\n", "38\n", "40\n", "42\n" ] } ], "source": [ "#n = 0\n", "for evens in evens_up_to(2000000000000000000):\n", " # n += 1\n", " print(evens)\n", " if evens > 40:\n", " break" ] }, { "cell_type": "code", "execution_count": 24, "id": "5af8b087-6b63-4948-811d-5ba4e0a8ba55", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "range(0, 100000000000000000000)" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(range(100000000000000000000))" ] }, { "cell_type": "code", "execution_count": 25, "id": "2fbf706b-6f6c-4a24-890e-10121d709582", "metadata": {}, "outputs": [], "source": [ "# Generators do not have to ever exit, here is an infinite generator\n", "def powers_of_two():\n", " n = 2\n", " while True:\n", " yield n\n", " n *= 2" ] }, { "cell_type": "code", "execution_count": null, "id": "32920b91-7b00-4b2a-8927-07bd5cbc6fa2", "metadata": {}, "outputs": [], "source": [ "list(powers_of_two())" ] }, { "cell_type": "code", "execution_count": 26, "id": "27a8bd75-3912-4f97-9655-763279f07f9b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2\n", "4\n", "8\n", "16\n", "32\n", "64\n" ] } ], "source": [ "for x in powers_of_two():\n", " if x > 100:\n", " break\n", " print(x)" ] }, { "cell_type": "code", "execution_count": 27, "id": "5c3b2178-65fc-43d2-8940-d91bf58e1881", "metadata": {}, "outputs": [], "source": [ "g = powers_of_two()" ] }, { "cell_type": "code", "execution_count": 41, "id": "396ce187-a2e3-4611-a188-9403f17cba34", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "16384" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": null, "id": "dd4f4d70-e74a-43d3-88bc-7dfe04add2a2", "metadata": { "tags": [] }, "outputs": [], "source": [ "for x in range(100000000000000000000000000000000):\n", " if x > 5:\n", " break\n", " print(x)" ] }, { "cell_type": "code", "execution_count": null, "id": "c5ba1490-5fe1-4120-95a0-f60f85559959", "metadata": {}, "outputs": [], "source": [ "y = list(range(100000))" ] }, { "cell_type": "code", "execution_count": null, "id": "fb4db8a4-36da-4d56-b8bd-c10b6e01fc73", "metadata": {}, "outputs": [], "source": [ "len(y)" ] }, { "cell_type": "code", "execution_count": 65, "id": "9be584c1-4cf9-4ad3-88ab-4b5da18e42da", "metadata": {}, "outputs": [], "source": [ "g = powers_of_two()\n", "h = powers_of_two()\n", "k = h" ] }, { "cell_type": "code", "execution_count": 43, "id": "81095f30-547f-4624-b2a4-9cc5997c4a8b", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g is not h" ] }, { "cell_type": "code", "execution_count": 77, "id": "c9a9fa0a-e699-4a56-9e9a-cd7e07f1a446", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "128" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(h)" ] }, { "cell_type": "code", "execution_count": 78, "id": "9b019479-cb3a-41b4-8ef8-a62e61b56430", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "256" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(k)" ] }, { "cell_type": "code", "execution_count": 44, "id": "07cb4313-a300-47c3-b873-b074399a2fad", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4427895344" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "id(g)\n" ] }, { "cell_type": "code", "execution_count": 45, "id": "3e2ea606-1041-4171-9096-837fe1eaacdd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4427893440" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "id(h)" ] }, { "cell_type": "code", "execution_count": 62, "id": "e34470cd-efdd-45d0-a1b5-58e478260e4c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4096" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": 64, "id": "2e656696-8e3e-4dfd-86f9-9b87a08debe3", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "128" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(h)" ] }, { "cell_type": "code", "execution_count": null, "id": "39b39657-243c-4641-9859-90fdb7adb3fc", "metadata": {}, "outputs": [], "source": [ "r = range(100)\n", "print(r)" ] }, { "cell_type": "code", "execution_count": null, "id": "1f746d52-4f7f-4a97-aaeb-330e99aaf722", "metadata": {}, "outputs": [], "source": [ "# what happens if the parameter changes during iteration?\n", "def gen_test(param):\n", " # param = ...bound...\n", " i = 0\n", " while i < param:\n", " yield i\n", " i += 1\n", "\n", "bound = 5\n", "for x in gen_test(bound):\n", " print(\"x is \", x, \"bound is\", bound)\n", " bound.append(4)" ] }, { "cell_type": "code", "execution_count": null, "id": "66d6ae08-da37-4f90-8a77-db08205c3b17", "metadata": {}, "outputs": [], "source": [ "def gen_test2():\n", " yield 1\n", " yield 2\n", " yield 3" ] }, { "cell_type": "code", "execution_count": null, "id": "88b2aaee-b02e-44f5-8a12-2ba2a539978c", "metadata": {}, "outputs": [], "source": [ "g = gen_test2()\n", "next(g)" ] }, { "cell_type": "code", "execution_count": null, "id": "1e47ea06-b792-415c-9af9-a850b82b3c95", "metadata": {}, "outputs": [], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": null, "id": "2a7803f2-4751-4f41-b1e4-04a212da71aa", "metadata": {}, "outputs": [], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": null, "id": "d8081044-cab9-4cd3-9f03-d50fc5ad1362", "metadata": {}, "outputs": [], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": null, "id": "40156c07-49d1-4753-9f26-0fe2481e9a42", "metadata": {}, "outputs": [], "source": [ "x = [1, 2, 3]" ] }, { "cell_type": "code", "execution_count": null, "id": "ee0103a6-daf2-457b-af6a-913d7fdb9f92", "metadata": {}, "outputs": [], "source": [ "g = iter(x)" ] }, { "cell_type": "code", "execution_count": null, "id": "c6d86fd8-a441-48c2-847e-e56b576102ae", "metadata": {}, "outputs": [], "source": [ "def newrange(lower_bound, upper_bound):\n", " i = lower_bound\n", " while i < upper_bound:\n", " yield i\n", " i += 1" ] }, { "cell_type": "markdown", "id": "308fc5a9-78ac-40cd-afac-970614e9460e", "metadata": {}, "source": [ "### Discussion: Benefits of Generators" ] }, { "cell_type": "markdown", "id": "4fefa3b8-0727-4586-97df-698b91d91c6f", "metadata": {}, "source": [ "- Avoids creating entire collections up front.\n", "\n", "- Can result in drastic memory savings.\n", "\n", " - How much memory does `range(100000)` need?\n", "\n", "- Avoids doing expensive computations until necessary." ] }, { "attachments": {}, "cell_type": "markdown", "id": "229fa10f-331e-4b72-959e-d28b952760ee", "metadata": {}, "source": [ "### Generator Expressions / Comprehensions\n", "\n", "Exact same syntax as a list comprehension, but results in a generator object.\n", "\n", "\n", "```python\n", "g = (expression for var in iterable)\n", "\n", "# or \n", "\n", "g = (expression for var in iterable if condition)\n", "```\n", "\n", "Creates a generator that yields `expression` for each iteration of the for loop. (Optionally only if the `condition` is satisfied)\n", "\n", "Equivalent to:\n", "\n", "```python\n", "def gf():\n", " for var in iterable:\n", " if condition:\n", " yield expression\n", "g = gf()\n", "```" ] }, { "cell_type": "code", "execution_count": 79, "id": "aa9b4c18-3d73-4d9c-8e78-3dba56b4a717", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " at 0x107ec63b0>\n" ] } ], "source": [ "pow_generator = (i + 1 for i in powers_of_two())\n", "print(pow_generator)" ] }, { "cell_type": "code", "execution_count": null, "id": "2e69c9d6-11ec-4e01-9116-95f55c8c2c7e", "metadata": { "tags": [] }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 80, "id": "729f6476-491a-4daf-b460-23f602ef18fa", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3\n", "5\n", "9\n", "17\n", "33\n", "65\n", "129\n", "257\n", "513\n", "1025\n", "2049\n", "4097\n", "8193\n", "16385\n", "32769\n", "65537\n", "131073\n", "262145\n", "524289\n", "1048577\n" ] } ], "source": [ "# can be used just like other generators we've seen (as an iterable)\n", "for i in pow_generator:\n", " print(i)\n", " # will run forever without this\n", " if i > 1000000:\n", " break" ] }, { "cell_type": "code", "execution_count": 81, "id": "cfed52c0-0467-4b50-ac6c-66d6fa694ec6", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2097153\n", "4194305\n", "8388609\n", "16777217\n" ] } ], "source": [ "for x in pow_generator:\n", " print(x) # can resume the generator\n", " if x > 10000000:\n", " break" ] }, { "cell_type": "code", "execution_count": 89, "id": "386c0eea-ea29-48bf-ba1b-dc174f340f18", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4294967297" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(pow_generator)" ] }, { "cell_type": "code", "execution_count": 91, "id": "ac1be105-edc6-4b52-8715-e54c186861d2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 4, 16, 36, 64]" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# also possible to specify conditionals in generator expression, same as comprehension\n", "gen2 = (i**2 for i in range(10) if i % 2 == 0)\n", "list(gen2)" ] }, { "cell_type": "code", "execution_count": 92, "id": "c721ff55-86ac-46b1-826f-4613905162c8", "metadata": { "tags": [] }, "outputs": [], "source": [ "# equivalent with map & filter\n", "ll = range(10)\n", "g = map(lambda x: x**2, filter(lambda x: x % 2 == 0, ll))" ] }, { "cell_type": "code", "execution_count": 93, "id": "a5aba588-3823-4763-b1ab-5a6217f72f8e", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": 94, "id": "2452f706-096d-4bca-aae7-cc6564e48a23", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": 95, "id": "8a250ce6-7e8e-4915-9ea0-1751a96046f0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "16" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": 98, "id": "8949fbea-455c-483b-83b1-66b9c70fdf30", "metadata": {}, "outputs": [ { "ename": "StopIteration", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mStopIteration\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[98], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mg\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[0;31mStopIteration\u001b[0m: " ] } ], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": 99, "id": "832bd2a5-cf99-40bf-9dd3-7a2833589485", "metadata": {}, "outputs": [], "source": [ "ll = [1, 2, 3, 4]\n", "g = iter(ll)\n" ] }, { "cell_type": "code", "execution_count": 100, "id": "01662fa6-dfb9-4609-a04f-9394c7b63a16", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": 101, "id": "6c353acf-93a7-47c8-8016-90b9b407767d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": 102, "id": "e0e9d459-7d96-4187-ba3e-8f8e6a69eb5d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": 103, "id": "11f9029d-5029-496c-894e-b416d5b308d5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": 104, "id": "8a94d0a3-d09b-4ab7-a465-93c7a6e1511e", "metadata": {}, "outputs": [ { "ename": "StopIteration", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mStopIteration\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[104], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mg\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[0;31mStopIteration\u001b[0m: " ] } ], "source": [ "next(g)" ] }, { "cell_type": "code", "execution_count": null, "id": "c7250129-eab1-4fec-bb46-917e7c11f01e", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.15" } }, "nbformat": 4, "nbformat_minor": 5 }