about summary refs log tree commit diff stats
path: root/js/games/nluqo.github.io/~bh/61a-pages/Lectures/mapreduce-demo.scm
blob: 70a0035ad7d6c1e082f53d2735d071ff5a555e40 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
;; CS 61A Mapreduce lecture demos

;; Count how many times any word is used in the title of a Beatles song:

(define (wordcount-mapper document-line-kv-pair)
  (map (lambda (wd-in-line) (make-kv-pair wd-in-line 1))
       (kv-value document-line-kv-pair)))

(define wordcounts (mapreduce wordcount-mapper + 0 "/beatles-songs"))

(ss wordcounts)

;; How to examine a distributed file

(ss (mapreduce list cons-stream the-empty-stream "/beatles-songs"))

;; Find the most commonly used word in any Beatles title.
;; (This actually gets the most commonly used word per initial letter;
;; another reduction on a single machine is needed to get the absolutely
;; most commonly used word.)

(define (find-max-mapper kv-pair)
  (list (make-kv-pair (first (kv-key kv-pair))
		      kv-pair)))

(define (find-max-reducer current so-far)
  (if (> (kv-value current) (kv-value so-far))
      current
      so-far))

(define frequent (mapreduce find-max-mapper find-max-reducer
			    (make-kv-pair 'foo 0) wordcounts))

(ss frequent)

(stream-accumulate find-max-reducer (make-kv-pair 'foo 0)
		   (stream-map kv-value frequent))

;; Find the total number of lines of text in all of Shakespeare:

(define will (mapreduce (lambda (kv-pair) (list (make-kv-pair 'line 1)))
			+ 0 "/gutenberg/shakespeare"))


;; This is the desired result from the modification of the above program
;; to find line counts per play.

'((a-midsummer-nights-dream . 2456) (alls-well-that-ends-well . 3231)
  (henry-v . 3643) (loves-labours-lost . 3015) (macbeth . 2900)
  (the-taming-of-the-shrew . 3033) (the-tempest . 2644) (king-john . 3028)
  (the-merry-wives-of-windsor . 3152) (the-winters-tale . 3595)
  (timon-of-athens . 2852) (troilus-and-cressida . 3983)
  (as-you-like-it . 2974) (cymbeline . 4164) (henry-iv-1 . 3355)
  (henry-vi-1 . 3409) (king-lear . 3985) (sonnets . 2626)
  (the-tragedy-of-antony-and-cleopatra . 4175)
  (the-tragedy-of-coriolanus . 4277) (the-two-gentlemen-of-verona . 2559)
  (a-lovers-complaint . 385) (hamlet . 4553) (henry-iv-2 . 3587)
  (henry-vi-2 . 3646) (measure-for-measure . 3119) (othello . 3897)
  (richard-iii . 4541) (twelfth-night . 2801) (henry-vi-3 . 3524)
  (henry-viii . 3765) (much-ado-about-nothing . 2789)
  (romeo-and-juliet . 3620) (the-duke-of-venice . 2968))
"na">color="#ffffff" face="helvetica, arial"><big><strong>Classes</strong></big></font></td></tr> <tr><td bgcolor="#ee77aa"><tt>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt></td><td>&nbsp;</td> <td width="100%"><dl> <dt><font face="helvetica, arial"><a href="builtins.html#object">builtins.object</a> </font></dt><dd> <dl> <dt><font face="helvetica, arial"><a href="ranger.gui.widgets.statusbar.html#Message">Message</a> </font></dt></dl> </dd> <dt><font face="helvetica, arial"><a href="ranger.gui.widgets.html#Widget">ranger.gui.widgets.Widget</a>(<a href="ranger.gui.displayable.html#Displayable">ranger.gui.displayable.Displayable</a>) </font></dt><dd> <dl> <dt><font face="helvetica, arial"><a href="ranger.gui.widgets.statusbar.html#StatusBar">StatusBar</a> </font></dt></dl> </dd> </dl> <p> <table width="100%" cellspacing=0 cellpadding=2 border=0 summary="section"> <tr bgcolor="#ffc8d8"> <td colspan=3 valign=bottom>&nbsp;<br> <font color="#000000" face="helvetica, arial"><a name="Message">class <strong>Message</strong></a>(<a href="builtins.html#object">builtins.object</a>)</font></td></tr> <tr><td bgcolor="#ffc8d8"><tt>&nbsp;&nbsp;&nbsp;</tt></td><td>&nbsp;</td> <td width="100%">Methods defined here:<br> <dl><dt><a name="Message-__init__"><strong>__init__</strong></a>(self, text, duration, bad)</dt></dl> <dl><dt><a name="Message-is_alive"><strong>is_alive</strong></a>(self)</dt></dl> <hr> Data descriptors defined here:<br> <dl><dt><strong>__dict__</strong></dt> <dd><tt>dictionary&nbsp;for&nbsp;instance&nbsp;variables&nbsp;(if&nbsp;defined)</tt></dd> </dl> <dl><dt><strong>__weakref__</strong></dt> <dd><tt>list&nbsp;of&nbsp;weak&nbsp;references&nbsp;to&nbsp;the&nbsp;object&nbsp;(if&nbsp;defined)</tt></dd> </dl> <hr> Data and other attributes defined here:<br> <dl><dt><strong>bad</strong> = False</dl> <dl><dt><strong>elapse</strong> = None</dl> <dl><dt><strong>text</strong> = None</dl> </td></tr></table> <p> <table width="100%" cellspacing=0 cellpadding=2 border=0 summary="section"> <tr bgcolor="#ffc8d8"> <td colspan=3 valign=bottom>&nbsp;<br> <font color="#000000" face="helvetica, arial"><a name="StatusBar">class <strong>StatusBar</strong></a>(<a href="ranger.gui.widgets.html#Widget">ranger.gui.widgets.Widget</a>)</font></td></tr> <tr bgcolor="#ffc8d8"><td rowspan=2><tt>&nbsp;&nbsp;&nbsp;</tt></td> <td colspan=2><tt>The&nbsp;statusbar&nbsp;displays&nbsp;information&nbsp;about&nbsp;the&nbsp;current&nbsp;file&nbsp;and&nbsp;directory.<br> &nbsp;<br> On&nbsp;the&nbsp;left&nbsp;side,&nbsp;there&nbsp;is&nbsp;a&nbsp;display&nbsp;similar&nbsp;to&nbsp;what&nbsp;"ls&nbsp;-l"&nbsp;would<br> print&nbsp;for&nbsp;the&nbsp;current&nbsp;file.&nbsp;&nbsp;The&nbsp;right&nbsp;side&nbsp;shows&nbsp;directory&nbsp;information<br> such&nbsp;as&nbsp;the&nbsp;space&nbsp;used&nbsp;by&nbsp;all&nbsp;the&nbsp;files&nbsp;in&nbsp;this&nbsp;directory.<br>&nbsp;</tt></td></tr> <tr><td>&nbsp;</td> <td width="100%"><dl><dt>Method resolution order:</dt> <dd><a href="ranger.gui.widgets.statusbar.html#StatusBar">StatusBar</a></dd> <dd><a href="ranger.gui.widgets.html#Widget">ranger.gui.widgets.Widget</a></dd> <dd><a href="ranger.gui.displayable.html#Displayable">ranger.gui.displayable.Displayable</a></dd> <dd><a href="ranger.shared.html#EnvironmentAware">ranger.shared.EnvironmentAware</a></dd> <dd><a href="ranger.shared.html#FileManagerAware">ranger.shared.FileManagerAware</a></dd> <dd><a href="ranger.shared.html#Awareness">ranger.shared.Awareness</a></dd> <dd><a href="ranger.gui.curses_shortcuts.html#CursesShortcuts">ranger.gui.curses_shortcuts.CursesShortcuts</a></dd> <dd><a href="ranger.shared.settings.html#SettingsAware">ranger.shared.settings.SettingsAware</a></dd> <dd><a href="builtins.html#object">builtins.object</a></dd> </dl> <hr> Methods defined here:<br> <dl><dt><a name="StatusBar-__init__"><strong>__init__</strong></a>(self, win, column<font color="#909090">=None</font>)</dt></dl> <dl><dt><a name="StatusBar-draw"><strong>draw</strong></a>(self)</dt><dd><tt>Draw&nbsp;the&nbsp;statusbar</tt></dd></dl> <dl><dt><a name="StatusBar-notify"><strong>notify</strong></a>(self, text, duration<font color="#909090">=4</font>, bad<font color="#909090">=False</font>)</dt></dl> <dl><dt><a name="StatusBar-request_redraw"><strong>request_redraw</strong></a>(self)</dt></dl> <hr> Data and other attributes defined here:<br> <dl><dt><strong>groups</strong> = {}</dl> <dl><dt><strong>hint</strong> = None</dl> <dl><dt><strong>msg</strong> = None</dl> <dl><dt><strong>old_cf</strong> = None</dl> <dl><dt><strong>old_du</strong> = None</dl> <dl><dt><strong>old_hint</strong> = None</dl> <dl><dt><strong>old_mtime</strong> = None</dl> <dl><dt><strong>owners</strong> = {}</dl> <dl><dt><strong>result</strong> = None</dl> <dl><dt><strong>timeformat</strong> = '%Y-%m-%d %H:%M'</dl> <hr> Methods inherited from <a href="ranger.gui.displayable.html#Displayable">ranger.gui.displayable.Displayable</a>:<br> <dl><dt><a name="StatusBar-__bool__"><strong>__bool__</strong></a> = __nonzero__(self)</dt><dd><tt>Always&nbsp;True</tt></dd></dl> <dl><dt><a name="StatusBar-__contains__"><strong>__contains__</strong></a>(self, item)</dt><dd><tt>Is&nbsp;item&nbsp;inside&nbsp;the&nbsp;boundaries?<br> item&nbsp;can&nbsp;be&nbsp;an&nbsp;iterable&nbsp;like&nbsp;[y,&nbsp;x]&nbsp;or&nbsp;an&nbsp;<a href="builtins.html#object">object</a>&nbsp;with&nbsp;x&nbsp;and&nbsp;y&nbsp;methods.</tt></dd></dl> <dl><dt><a name="StatusBar-__nonzero__"><strong>__nonzero__</strong></a>(self)</dt><dd><tt>Always&nbsp;True</tt></dd></dl> <dl><dt><a name="StatusBar-__str__"><strong>__str__</strong></a>(self)</dt></dl> <dl><dt><a name="StatusBar-click"><strong>click</strong></a>(self, event)</dt><dd><tt>Called&nbsp;when&nbsp;a&nbsp;mouse&nbsp;key&nbsp;is&nbsp;pressed&nbsp;and&nbsp;self.<strong>focused</strong>&nbsp;is&nbsp;True.<br> Override&nbsp;this!</tt></dd></dl> <dl><dt><a name="StatusBar-contains_point"><strong>contains_point</strong></a>(self, y, x)</dt><dd><tt>Test&nbsp;whether&nbsp;the&nbsp;point&nbsp;(with&nbsp;absolute&nbsp;coordinates)&nbsp;lies<br> within&nbsp;the&nbsp;boundaries&nbsp;of&nbsp;this&nbsp;<a href="builtins.html#object">object</a>.</tt></dd></dl> <dl><dt><a name="StatusBar-destroy"><strong>destroy</strong></a>(self)</dt><dd><tt>Called&nbsp;when&nbsp;the&nbsp;<a href="builtins.html#object">object</a>&nbsp;is&nbsp;destroyed.<br> Override&nbsp;this!</tt></dd></dl> <dl><dt><a name="StatusBar-finalize"><strong>finalize</strong></a>(self)</dt><dd><tt>Called&nbsp;after&nbsp;every&nbsp;displayable&nbsp;is&nbsp;done&nbsp;drawing.<br> Override&nbsp;this!</tt></dd></dl> <dl><dt><a name="StatusBar-poke"><strong>poke</strong></a>(self)</dt><dd><tt>Called&nbsp;before&nbsp;drawing,&nbsp;even&nbsp;if&nbsp;invisible</tt></dd></dl> <dl><dt><a name="StatusBar-press"><strong>press</strong></a>(self, key)</dt><dd><tt>Called&nbsp;when&nbsp;a&nbsp;key&nbsp;is&nbsp;pressed&nbsp;and&nbsp;self.<strong>focused</strong>&nbsp;is&nbsp;True.<br> Override&nbsp;this!</tt></dd></dl> <dl><dt><a name="StatusBar-resize"><strong>resize</strong></a>(self, y, x, hei<font color="#909090">=None</font>, wid<font color="#909090">=None</font>)</dt><dd><tt>Resize&nbsp;the&nbsp;widget</tt></dd></dl> <hr> Data and other attributes inherited from <a href="ranger.shared.html#EnvironmentAware">ranger.shared.EnvironmentAware</a>:<br> <dl><dt><strong>env</strong> = None</dl> <hr> Data and other attributes inherited from <a href="ranger.shared.html#FileManagerAware">ranger.shared.FileManagerAware</a>:<br> <dl><dt><strong>fm</strong> = None</dl> <hr> Data descriptors inherited from <a href="ranger.shared.html#Awareness">ranger.shared.Awareness</a>:<br> <dl><dt><strong>__dict__</strong></dt> <dd><tt>dictionary&nbsp;for&nbsp;instance&nbsp;variables&nbsp;(if&nbsp;defined)</tt></dd> </dl> <dl><dt><strong>__weakref__</strong></dt> <dd><tt>list&nbsp;of&nbsp;weak&nbsp;references&nbsp;to&nbsp;the&nbsp;object&nbsp;(if&nbsp;defined)</tt></dd> </dl> <hr> Methods inherited from <a href="ranger.gui.curses_shortcuts.html#CursesShortcuts">ranger.gui.curses_shortcuts.CursesShortcuts</a>:<br> <dl><dt><a name="StatusBar-addch"><strong>addch</strong></a>(self, *args)</dt></dl> <dl><dt><a name="StatusBar-addnstr"><strong>addnstr</strong></a>(self, *args)</dt></dl> <dl><dt><a name="StatusBar-addstr"><strong>addstr</strong></a>(self, *args)</dt></dl> <dl><dt><a name="StatusBar-color"><strong>color</strong></a>(self, *keys)</dt><dd><tt>Change&nbsp;the&nbsp;colors&nbsp;from&nbsp;now&nbsp;on.</tt></dd></dl> <dl><dt><a name="StatusBar-color_at"><strong>color_at</strong></a>(self, y, x, wid, *keys)</dt><dd><tt>Change&nbsp;the&nbsp;colors&nbsp;at&nbsp;the&nbsp;specified&nbsp;position</tt></dd></dl> <dl><dt><a name="StatusBar-color_reset"><strong>color_reset</strong></a>(self)</dt><dd><tt>Change&nbsp;the&nbsp;colors&nbsp;to&nbsp;the&nbsp;default&nbsp;colors</tt></dd></dl> <hr> Data and other attributes inherited from <a href="ranger.shared.settings.html#SettingsAware">ranger.shared.settings.SettingsAware</a>:<br> <dl><dt><strong>settings</strong> = {}</dl> </td></tr></table></td></tr></table><p> <table width="100%" cellspacing=0 cellpadding=2 border=0 summary="section"> <tr bgcolor="#eeaa77"> <td colspan=3 valign=bottom>&nbsp;<br> <font color="#ffffff" face="helvetica, arial"><big><strong>Functions</strong></big></font></td></tr> <tr><td bgcolor="#eeaa77"><tt>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt></td><td>&nbsp;</td> <td width="100%"><dl><dt><a name="-getgrgid"><strong>getgrgid</strong></a>(...)</dt><dd><tt><a href="#-getgrgid">getgrgid</a>(id)&nbsp;-&gt;&nbsp;tuple<br> Return&nbsp;the&nbsp;group&nbsp;database&nbsp;entry&nbsp;for&nbsp;the&nbsp;given&nbsp;numeric&nbsp;group&nbsp;ID.&nbsp;&nbsp;If<br> id&nbsp;is&nbsp;not&nbsp;valid,&nbsp;raise&nbsp;KeyError.</tt></dd></dl> <dl><dt><a name="-getpwuid"><strong>getpwuid</strong></a>(...)</dt><dd><tt><a href="#-getpwuid">getpwuid</a>(uid)&nbsp;-&gt;&nbsp;(pw_name,pw_passwd,pw_uid,<br> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;pw_gid,pw_gecos,pw_dir,pw_shell)<br> Return&nbsp;the&nbsp;password&nbsp;database&nbsp;entry&nbsp;for&nbsp;the&nbsp;given&nbsp;numeric&nbsp;user&nbsp;ID.<br> See&nbsp;pwd.__doc__&nbsp;for&nbsp;more&nbsp;on&nbsp;password&nbsp;database&nbsp;entries.</tt></dd></dl> <dl><dt><a name="-getuid"><strong>getuid</strong></a>(...)</dt><dd><tt><a href="#-getuid">getuid</a>()&nbsp;-&gt;&nbsp;uid<br> &nbsp;<br> Return&nbsp;the&nbsp;current&nbsp;process's&nbsp;user&nbsp;id.</tt></dd></dl> <dl><dt><a name="-localtime"><strong>localtime</strong></a>(...)</dt><dd><tt><a href="#-localtime">localtime</a>([seconds])&nbsp;-&gt;&nbsp;(tm_year,tm_mon,tm_mday,tm_hour,tm_min,<br> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tm_sec,tm_wday,tm_yday,tm_isdst)<br> &nbsp;<br> Convert&nbsp;seconds&nbsp;since&nbsp;the&nbsp;Epoch&nbsp;to&nbsp;a&nbsp;time&nbsp;tuple&nbsp;expressing&nbsp;local&nbsp;time.<br> When&nbsp;'seconds'&nbsp;is&nbsp;not&nbsp;passed&nbsp;in,&nbsp;convert&nbsp;the&nbsp;current&nbsp;time&nbsp;instead.</tt></dd></dl> <dl><dt><a name="-readlink"><strong>readlink</strong></a>(...)</dt><dd><tt><a href="#-readlink">readlink</a>(path)&nbsp;-&gt;&nbsp;path<br> &nbsp;<br> Return&nbsp;a&nbsp;string&nbsp;representing&nbsp;the&nbsp;path&nbsp;to&nbsp;which&nbsp;the&nbsp;symbolic&nbsp;link&nbsp;points.</tt></dd></dl> <dl><dt><a name="-strftime"><strong>strftime</strong></a>(...)</dt><dd><tt><a href="#-strftime">strftime</a>(format[,&nbsp;tuple])&nbsp;-&gt;&nbsp;string<br> &nbsp;<br> Convert&nbsp;a&nbsp;time&nbsp;tuple&nbsp;to&nbsp;a&nbsp;string&nbsp;according&nbsp;to&nbsp;a&nbsp;format&nbsp;specification.<br> See&nbsp;the&nbsp;library&nbsp;reference&nbsp;manual&nbsp;for&nbsp;formatting&nbsp;codes.&nbsp;When&nbsp;the&nbsp;time&nbsp;tuple<br> is&nbsp;not&nbsp;present,&nbsp;current&nbsp;time&nbsp;as&nbsp;returned&nbsp;by&nbsp;<a href="#-localtime">localtime</a>()&nbsp;is&nbsp;used.</tt></dd></dl> <dl><dt><a name="-time"><strong>time</strong></a>(...)</dt><dd><tt><a href="#-time">time</a>()&nbsp;-&gt;&nbsp;floating&nbsp;point&nbsp;number<br> &nbsp;<br> Return&nbsp;the&nbsp;current&nbsp;time&nbsp;in&nbsp;seconds&nbsp;since&nbsp;the&nbsp;Epoch.<br> Fractions&nbsp;of&nbsp;a&nbsp;second&nbsp;may&nbsp;be&nbsp;present&nbsp;if&nbsp;the&nbsp;system&nbsp;clock&nbsp;provides&nbsp;them.</tt></dd></dl> </td></tr></table> </body></html>