From 45af0466413ad56a442fd515018f42951b416fdf Mon Sep 17 00:00:00 2001 From: Matthijs Blom <19817960+MatthijsBlom@users.noreply.github.com> Date: Fri, 24 Mar 2023 22:54:17 +0100 Subject: [PATCH 1/4] Initial sketch --- .../.approaches/introduction.md | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 exercises/practice/sum-of-multiples/.approaches/introduction.md diff --git a/exercises/practice/sum-of-multiples/.approaches/introduction.md b/exercises/practice/sum-of-multiples/.approaches/introduction.md new file mode 100644 index 0000000000..ee5333acf3 --- /dev/null +++ b/exercises/practice/sum-of-multiples/.approaches/introduction.md @@ -0,0 +1,109 @@ +# Introduction + + + +Several possible approaches to this exercise: + +- filter for multiples +- generate multiples and + - gather & de-duplicate, e.g. using `set().union` + - merge the multiple-generators into one +- spot the repeating pattern + + +## Approach: `filter` for multiples + +```python +def sum_of_multiples(limit, factors): + is_multiple = lambda n: any(n % f == 0 for f in factors if f != 0) + return sum(filter(is_multiple, range(limit))) +``` + +Egregious performance when multiples are few. + +... + + + +## Approach: generate & gather multiples + +```python +def sum_of_multiples(limit, factors): + multiples = (range(0, limit, f) for f in factors if f != 0) + return sum(set().union(*multiples)) +``` + +Egregious memory occupancy when multiples are many. + +... + + + +## Approach: merge the multiple-generators into one + +```python +# NOTE This is a sketch (but it does work) +def sum_of_multiples(limit, factors): + generators = [range(0, limit, f) for f in factors if f != 0] + while len(generators) > 1: + generators = [ + merge(g, g_) + for g, g_ in zip_longest(generators[0::2], generators[1::2], fillvalue=()) + ] + all_multiples, *_ = generators + [()] + return sum(all_multiples) + + +def merge(gen1, gen2): + """Merge two sorted-without-duplicates iterables + into a single sorted-without-duplicates generator. + """ + return sorted({*gen1, *gen2}) # FIXME this is CHEATING +``` + +This is supposed to use very little memory. + +... + + + +## Approach: spot the repeating pattern + +```python +# NOTE this too is but a sketch (that nevertheless works) +def sum_of_multiples(limit, factors): + (*factors,) = filter(lambda f: f != 0, factors) + N = lcm(*factors) + is_multiple = lambda n: any(n % f == 0 for f in factors) + multiples_up_to_lcm = [n for n in range(1, N + 1) if is_multiple(n)] + q, r = divmod(limit - 1, N) + return ( + q * (q - 1) // 2 * N * len(multiples_up_to_lcm) + + q * sum(multiples_up_to_lcm) + + sum(q * N + m for m in takewhile(lambda m: m <= r, multiples_up_to_lcm)) + ) +``` + +```text +assuming: limit = 22 multiples = [2, 3] +the task is to sum the lower 4 below rows + +| 1 2 3 4 5 6| 7 8 9 10 11 12|13 14 15 16 17 18|19 20 21 +| 2 3 4 6| 6 6 6 6| 6 6 6 6| 6 6 +| | 2 3 4 6| 6 6 6 6| 6 6 +| | | 2 3 4 6| 6 6 +| | | | 2 3 + +We see + 3 copies of - 2 3 4 - 6 + 0+1+2 = 3×(3-1)/2 = 3 copies of - 6 6 6 - 6 + 3 copies of - 6 6 + 1 copy of - 2 3 +``` + + + +This approach saves on a lot of iteration, but is still vulnerable to excessive memory use. +Fortunately it can be combined with the generator merging approach. + +... From a4bb5cde296981265a543c0549280f90e3c11b8b Mon Sep 17 00:00:00 2001 From: Matthijs Blom <19817960+MatthijsBlom@users.noreply.github.com> Date: Thu, 30 Mar 2023 18:43:03 +0200 Subject: [PATCH 2/4] Start on approach: filter for multiples --- .../sum-of-multiples/.approaches/config.json | 18 ++ .../filter-for-multiples/content.md | 156 ++++++++++++++++++ .../filter-for-multiples/snippet.txt | 3 + .../.approaches/introduction.md | 15 +- 4 files changed, 190 insertions(+), 2 deletions(-) create mode 100644 exercises/practice/sum-of-multiples/.approaches/config.json create mode 100644 exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/content.md create mode 100644 exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/snippet.txt diff --git a/exercises/practice/sum-of-multiples/.approaches/config.json b/exercises/practice/sum-of-multiples/.approaches/config.json new file mode 100644 index 0000000000..12e6964fbe --- /dev/null +++ b/exercises/practice/sum-of-multiples/.approaches/config.json @@ -0,0 +1,18 @@ +{ + "introduction": { + "authors": [ + "MatthijsBlom" + ] + }, + "approaches": [ + { + "uuid": "7dd85d5b-12bd-48a6-97fe-8eb7dd87af72", + "slug": "filter-for-multiples", + "title": "Filter for multiples", + "blurb": "Use the built-in filter function to select the numbers that are multiples, then sum these.", + "authors": [ + "MatthijsBlom" + ] + } + ] +} diff --git a/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/content.md b/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/content.md new file mode 100644 index 0000000000..2dd452f2a5 --- /dev/null +++ b/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/content.md @@ -0,0 +1,156 @@ +# `filter` for multiples + +```python +def sum_of_multiples(limit, factors): + is_multiple = lambda n: any(n % f == 0 for f in factors if f != 0) + return sum(filter(is_multiple, range(limit))) +``` + +Probably the most straightforward way of solving this problem is to + +1. look at every individual integer between `0` and `limit`, +2. check that it is a multiple of any of the given `factors`, and +3. add it to the sum when it is. + + +## Notable language features used in this solution + +### Built-in function: `sum` + +Adding all the numbers in a collection together is a very common operation. +Therefore, Python provides the built-in function [`sum`][builtin-sum]. + +`sum` takes one argument, and requires that it be **iterable**. +A value is iterable whenever it makes sense to use it in a `for` loop like this: + +```python +for _ in iterable_value: # 👈 + ... +``` + +The `list` is the most commonly used iterable data structure. +Many other containers are also iterable, such as `set`s, `tuple`s, `range`s, and even `dict`s and `str`ings. +Still other examples include iterators and generators, which are discuss below. + +When given such a collection of numbers, `sum` will look at the elements one by one and add them together. +The result is a single number. + +```python +numbers = range(1, 100 + 1) # 1, 2, …, 100 +sum(numbers) +# ⟹ 5050 +``` + +Had the highlighted solution not used `sum`, it might have looked like this: + +```python +def sum_of_multiples(limit, factors): + is_multiple = lambda n: any(n % f == 0 for f in factors if f != 0) + total = 0 + for multiple in filter(is_multiple, range(limit)): + total += total + return total +``` + + +### Built-in function: `filter` + +Selecting elements of a collection for having a certain property is also a very common operation. +Therefore, Python provides the built-in function [`filter`][builtin-filter]. + +`filter` takes two arguments. +The first is a **predicate**. +The second is the iterable the elements of which should be filtered. + +A predicate is a function that takes one argument (of any particular type) and returns a `bool`. +Such functions are commonly used to encode properties of values. +An example is `str.isupper`, which takes a `str` and returns `True` whenever it is uppercase: + +```python +str.isupper("AAAAH! 😱") # ⟹ True +str.isupper("Eh? 😕") # ⟹ False +str.isupper("⬆️💼") # ⟹ False +``` + +Thus, the function `str.isupper` represents the property of _being an uppercase string_. + +Contrary to what you might expect, `filter` does not return a data structure like the one given as an argument: + +```python +filter(str.isupper, ["THUNDERBOLTS", "and", "LIGHTNING"]) +# ⟹ +``` + +Instead, it returns an **iterator**. + +An iterator is an object whose sole purpose is to guide iteration through some data structure. +In particular, `filter` makes sure that elements that do not satisfy the predicate are skipped. +It is a bit like a cursor that can move only to the right. + +The main differences between containers (such as `list`s) and iterators are + +- Containers can, depending on their contents, take up a lot of space in memory, but iterators are generally very small (regardless of how many elements they 'contain'). +- Containers can be iterated over multiple times, but iterators can be used only once. + +To illustrate the latter difference: + +```python +is_even = lambda n: n % 2 == 0 +numbers = range(20) # 0, 1, …, 19 +even_numbers = filter(is_even, numbers) # 0, 2, …, 18 +sum(numbers) # ⟹ 190 +sum(numbers) # ⟹ 190 +sum(even_numbers) # ⟹ 90 +sum(even_numbers) # ⟹ 0 +``` + +Here, `sum` iterates over both `numbers` and `even_numbers` twice. + +In the case of `numbers` everything is fine. +Even after looping through the whole of `numbers`, all its elements are still there, and so `sum` can ask to see them again without problem. + +The situation with `even_numbers` is move involved. +To use the _cursor_ analogy: after going through all of `even_number`'s 'elements' – actually elements of `numbers` – the cursor has moved all the way to the right. +It cannot move backwards, so if you wish to iterate over all even numbers then you need a new cursor. +We say the the `even_numbers` iterator is _exhausted_. When `sum` asks for its elements again, `even_numbers` comes up empty and so `sum` returns `0`. + +Had the highlighted solution not used `filter`, it might have looked like this: + +```python +def sum_of_multiples(limit, factors): + is_multiple = lambda n: any(n % f == 0 for f in factors if f != 0) + multiples = [candidate for candidate in range(limit) if is_multiple(candidate)] + return sum(multiples) +``` + +This variant stores all the multiples in a `list` before summing them. +Such a list can potentially be very big. +For example, if `limit = 1_000_000_000` and `factors = [1]` then `multiples` will be a list 8 gigabytes large! +It is to avoid unnecessarily creating such large intermediate data structures that iterators are often used. + + +### A function expression: `lambda` + +... + + +### Built-in function: `any` + +... + + +### A generator expression + +... + + +## Reflections on this approach + +An important advantage of this approach is that it is very easy to understand. +However, it suffers from potentially performing a lot of unnecessary work, for example when all `factors` are large, or when there are no `factors` at all. + + + + +[builtin-sum]: https://docs.python.org/3/library/functions.html#sum "Built-in Functions: sum" +[builtin-filter]: https://docs.python.org/3/library/functions.html#filter "Built-in Functions: filter" diff --git a/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/snippet.txt b/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/snippet.txt new file mode 100644 index 0000000000..e69e2d9d5a --- /dev/null +++ b/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/snippet.txt @@ -0,0 +1,3 @@ +def sum_of_multiples(limit, factors): + is_multiple = lambda n: any([n % f == 0 for f in factors if f != 0]) + return sum(filter(is_multiple, range(limit))) diff --git a/exercises/practice/sum-of-multiples/.approaches/introduction.md b/exercises/practice/sum-of-multiples/.approaches/introduction.md index ee5333acf3..cbc12071ec 100644 --- a/exercises/practice/sum-of-multiples/.approaches/introduction.md +++ b/exercises/practice/sum-of-multiples/.approaches/introduction.md @@ -19,9 +19,16 @@ def sum_of_multiples(limit, factors): return sum(filter(is_multiple, range(limit))) ``` -Egregious performance when multiples are few. +Probably the most straightforward way of solving this problem is to -... +1. look at every individual integer between `0` and `limit`, +2. check that it is a multiple of any of the given `factors`, and +3. add it to the sum when it is. + +An important advantage of this approach is that it is very easy to understand. +However, it suffers from potentially performing a lot of unnecessary work, for example when all `factors` are large, or when there are no `factors` at all. + +[Read more about this approach][filter-for-multiples]. @@ -107,3 +114,7 @@ This approach saves on a lot of iteration, but is still vulnerable to excessive Fortunately it can be combined with the generator merging approach. ... + + + +[filter-for-multiples]: https://exercism.org/tracks/python/exercises/sum-of-multiples/approaches/filter-for-multiples "Approach: filter for multiples" From cc51a7a65e02f4f0972546b043c1f316c5246f3e Mon Sep 17 00:00:00 2001 From: Matthijs Blom <19817960+MatthijsBlom@users.noreply.github.com> Date: Sat, 1 Apr 2023 01:48:02 +0200 Subject: [PATCH 3/4] Continued work on approach: filter for multiples --- .../filter-for-multiples/content.md | 109 +++++++++++++++--- 1 file changed, 93 insertions(+), 16 deletions(-) diff --git a/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/content.md b/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/content.md index 2dd452f2a5..49b13b1049 100644 --- a/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/content.md +++ b/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/content.md @@ -24,21 +24,20 @@ Therefore, Python provides the built-in function [`sum`][builtin-sum]. A value is iterable whenever it makes sense to use it in a `for` loop like this: ```python -for _ in iterable_value: # 👈 +for element in iterable_value: # 👈 ... ``` The `list` is the most commonly used iterable data structure. Many other containers are also iterable, such as `set`s, `tuple`s, `range`s, and even `dict`s and `str`ings. -Still other examples include iterators and generators, which are discuss below. +Still other examples include iterators and generators, which are discussed below. -When given such a collection of numbers, `sum` will look at the elements one by one and add them together. +When given a collection of numbers, `sum` will look at the elements one by one and add them up. The result is a single number. ```python numbers = range(1, 100 + 1) # 1, 2, …, 100 -sum(numbers) -# ⟹ 5050 +sum(numbers) # ⟹ 5050 ``` Had the highlighted solution not used `sum`, it might have looked like this: @@ -48,7 +47,7 @@ def sum_of_multiples(limit, factors): is_multiple = lambda n: any(n % f == 0 for f in factors if f != 0) total = 0 for multiple in filter(is_multiple, range(limit)): - total += total + total += multiple return total ``` @@ -74,7 +73,7 @@ str.isupper("⬆️💼") # ⟹ False Thus, the function `str.isupper` represents the property of _being an uppercase string_. -Contrary to what you might expect, `filter` does not return a data structure like the one given as an argument: +Contrary to what you might expect, `filter` does not return a data structure like the one given as the iterable argument: ```python filter(str.isupper, ["THUNDERBOLTS", "and", "LIGHTNING"]) @@ -84,12 +83,21 @@ filter(str.isupper, ["THUNDERBOLTS", "and", "LIGHTNING"]) Instead, it returns an **iterator**. An iterator is an object whose sole purpose is to guide iteration through some data structure. -In particular, `filter` makes sure that elements that do not satisfy the predicate are skipped. -It is a bit like a cursor that can move only to the right. +In particular, `filter` makes sure that elements that do not satisfy the predicate are skipped: + +```python +for word in filter(str.isupper, ["THUNDERBOLTS", "and", "LIGHTNING"]): + print(word) +# prints: +# THUNDERBOLTS +# LIGHTNING +``` + +An iterator is a bit like a cursor that can move only to the right. The main differences between containers (such as `list`s) and iterators are -- Containers can, depending on their contents, take up a lot of space in memory, but iterators are generally very small (regardless of how many elements they 'contain'). +- Containers can, depending on their contents, take up a lot of space in memory, but iterators are typically very small regardless of how many elements they 'contain'. - Containers can be iterated over multiple times, but iterators can be used only once. To illustrate the latter difference: @@ -109,10 +117,10 @@ Here, `sum` iterates over both `numbers` and `even_numbers` twice. In the case of `numbers` everything is fine. Even after looping through the whole of `numbers`, all its elements are still there, and so `sum` can ask to see them again without problem. -The situation with `even_numbers` is move involved. +The situation with `even_numbers` is less simple. To use the _cursor_ analogy: after going through all of `even_number`'s 'elements' – actually elements of `numbers` – the cursor has moved all the way to the right. -It cannot move backwards, so if you wish to iterate over all even numbers then you need a new cursor. -We say the the `even_numbers` iterator is _exhausted_. When `sum` asks for its elements again, `even_numbers` comes up empty and so `sum` returns `0`. +It cannot move backwards, so if you wish to iterate over all even numbers again then you need a new cursor. +We say that the `even_numbers` iterator is _exhausted_. When `sum` asks for its elements again, `even_numbers` comes up empty and so `sum` returns `0`. Had the highlighted solution not used `filter`, it might have looked like this: @@ -124,14 +132,83 @@ def sum_of_multiples(limit, factors): ``` This variant stores all the multiples in a `list` before summing them. -Such a list can potentially be very big. -For example, if `limit = 1_000_000_000` and `factors = [1]` then `multiples` will be a list 8 gigabytes large! +Such a list can become very big. +For example, if `limit = 1_000_000_000` and `factors = [1]` then `multiples` will take up 8 gigabytes of memory! It is to avoid unnecessarily creating such large intermediate data structures that iterators are often used. ### A function expression: `lambda` -... +Typically, when using higher-order functions like `filter` and `map`, the function to pass as an argument does not yet exist and needs to be defined first. + +The standard way of defining functions is through the `def` statement: + +```python +def name(parameters): + statements +``` + +Downsides of this construct include + +- the syntax can be a bit bulky +- it requires coming up with a fresh name + +These qualities can be quite bothersome when you just need a simple function of no particular significance for single use only. +In situations like this you might like to use a **lambda expression** instead. + +A lambda expression is a specific kind of expression that evaluates to a function. +It looks like this: + +```python +lambda parameters: expression # general form +lambda a, b, x: a * x + b # specific example +``` + +This latter lambda expression evaluates to a function that takes three arguments (`a`, `b`, `x`) and returns the value `a * x + b`. +Except for not having a name, it is equivalent to the function defined by + +```python +def some_name(a, b, x): + return a * x + b +``` + +A lambda expression need not necessarily be passed as an argument. +It can also be applied to arguments immediately, or assigned to a variable: + +```python +lambda a, b, x: a * x + b +# ⟹ at 0x000001F36A274CC0> + +(lambda a, b, x: a * x + b)(2, 3, 5) +# ⟹ 13 + +some_function = lambda a, b, x: a * x + b +some_function(2, 3, 5) +# ⟹ 13 + +list(filter( + lambda s: len(s) <= 3, + ["aaaa", "b", "ccccc", "dd", "eee"] +)) +# ⟹ ['b', 'dd', 'eee'] +``` + +Only functions that can be defined using a single (`return`) statement can be written as a lambda expression. +If you need multiple statements, you have no choice but to use `def`. + +The solution highlighted above assigns a lambda expression to a variable: `is_multiple`. +Some people consider this to be unidiomatic and feel one should always use `def` when a function is to have a name. +A lambda expression is used here anyway to demonstrate the feature, and also because the author prefers its compactness. + +Had the highlighted solution not used `lambda`, it might have looked like this: + +```python +def sum_of_multiples(limit, factors): + def is_multiple(n): + return any(n % f == 0 for f in factors if f != 0) + + return sum(filter(is_multiple, range(limit))) +``` ### Built-in function: `any` From 5dbe577c662965ffbc7b0d24ebeb1dd289e7043b Mon Sep 17 00:00:00 2001 From: Matthijs Blom <19817960+MatthijsBlom@users.noreply.github.com> Date: Sun, 2 Apr 2023 15:45:10 +0200 Subject: [PATCH 4/4] Avoid encouraging assigning lambda's to variables --- .../filter-for-multiples/content.md | 40 ++++++++++++++----- .../filter-for-multiples/snippet.txt | 6 ++- .../.approaches/introduction.md | 6 ++- 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/content.md b/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/content.md index 49b13b1049..c1dee79a6a 100644 --- a/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/content.md +++ b/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/content.md @@ -2,8 +2,10 @@ ```python def sum_of_multiples(limit, factors): - is_multiple = lambda n: any(n % f == 0 for f in factors if f != 0) - return sum(filter(is_multiple, range(limit))) + return sum(filter( + lambda n: any(n % f == 0 for f in factors if f != 0), + range(limit) + )) ``` Probably the most straightforward way of solving this problem is to @@ -44,9 +46,11 @@ Had the highlighted solution not used `sum`, it might have looked like this: ```python def sum_of_multiples(limit, factors): - is_multiple = lambda n: any(n % f == 0 for f in factors if f != 0) + multiples = filter( + lambda n: any(n % f == 0 for f in factors if f != 0), + range(limit)) total = 0 - for multiple in filter(is_multiple, range(limit)): + for multiple in multiples: total += multiple return total ``` @@ -103,7 +107,9 @@ The main differences between containers (such as `list`s) and iterators are To illustrate the latter difference: ```python -is_even = lambda n: n % 2 == 0 +def is_even(n): + return n % 2 == 0 + numbers = range(20) # 0, 1, …, 19 even_numbers = filter(is_even, numbers) # 0, 2, …, 18 sum(numbers) # ⟹ 190 @@ -126,7 +132,9 @@ Had the highlighted solution not used `filter`, it might have looked like this: ```python def sum_of_multiples(limit, factors): - is_multiple = lambda n: any(n % f == 0 for f in factors if f != 0) + def is_multiple(n): + return any(n % f == 0 for f in factors if f != 0) + multiples = [candidate for candidate in range(limit) if is_multiple(candidate)] return sum(multiples) ``` @@ -193,13 +201,25 @@ list(filter( # ⟹ ['b', 'dd', 'eee'] ``` +~~~~exercism/note +Immediately applying a lambda expression is possible, but generally pointless: + +```python +# Instead of +(lambda a, b, x: a * x + b)(2, 3, y) +# you might as well write +2 * y + 3 +``` +~~~~ + +~~~~exercism/caution +Assigning a lambda expressions to variables is unidiomatic. +When you want to give a lambda expression a name, use `def` instead. +~~~~ + Only functions that can be defined using a single (`return`) statement can be written as a lambda expression. If you need multiple statements, you have no choice but to use `def`. -The solution highlighted above assigns a lambda expression to a variable: `is_multiple`. -Some people consider this to be unidiomatic and feel one should always use `def` when a function is to have a name. -A lambda expression is used here anyway to demonstrate the feature, and also because the author prefers its compactness. - Had the highlighted solution not used `lambda`, it might have looked like this: ```python diff --git a/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/snippet.txt b/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/snippet.txt index e69e2d9d5a..f5a419a212 100644 --- a/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/snippet.txt +++ b/exercises/practice/sum-of-multiples/.approaches/filter-for-multiples/snippet.txt @@ -1,3 +1,5 @@ def sum_of_multiples(limit, factors): - is_multiple = lambda n: any([n % f == 0 for f in factors if f != 0]) - return sum(filter(is_multiple, range(limit))) + return sum(filter( + lambda n: any(n % f == 0 for f in factors if f != 0), + range(limit) + )) diff --git a/exercises/practice/sum-of-multiples/.approaches/introduction.md b/exercises/practice/sum-of-multiples/.approaches/introduction.md index cbc12071ec..d06c9ad2ed 100644 --- a/exercises/practice/sum-of-multiples/.approaches/introduction.md +++ b/exercises/practice/sum-of-multiples/.approaches/introduction.md @@ -15,8 +15,10 @@ Several possible approaches to this exercise: ```python def sum_of_multiples(limit, factors): - is_multiple = lambda n: any(n % f == 0 for f in factors if f != 0) - return sum(filter(is_multiple, range(limit))) + return sum(filter( + lambda n: any(n % f == 0 for f in factors if f != 0), + range(limit) + )) ``` Probably the most straightforward way of solving this problem is to