about summary refs log tree commit diff stats
path: root/js/games/nluqo.github.io/~bh/61a-pages/Lectures/mapreduce-demo.scm
blob: 70a0035ad7d6c1e082f53d2735d071ff5a555e40 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
;; CS 61A Mapreduce lecture demos

;; Count how many times any word is used in the title of a Beatles song:

(define (wordcount-mapper document-line-kv-pair)
  (map (lambda (wd-in-line) (make-kv-pair wd-in-line 1))
       (kv-value document-line-kv-pair)))

(define wordcounts (mapreduce wordcount-mapper + 0 "/beatles-songs"))

(ss wordcounts)

;; How to examine a distributed file

(ss (mapreduce list cons-stream the-empty-stream "/beatles-songs"))

;; Find the most commonly used word in any Beatles title.
;; (This actually gets the most commonly used word per initial letter;
;; another reduction on a single machine is needed to get the absolutely
;; most commonly used word.)

(define (find-max-mapper kv-pair)
  (list (make-kv-pair (first (kv-key kv-pair))
		      kv-pair)))

(define (find-max-reducer current so-far)
  (if (> (kv-value current) (kv-value so-far))
      current
      so-far))

(define frequent (mapreduce find-max-mapper find-max-reducer
			    (make-kv-pair 'foo 0) wordcounts))

(ss frequent)

(stream-accumulate find-max-reducer (make-kv-pair 'foo 0)
		   (stream-map kv-value frequent))

;; Find the total number of lines of text in all of Shakespeare:

(define will (mapreduce (lambda (kv-pair) (list (make-kv-pair 'line 1)))
			+ 0 "/gutenberg/shakespeare"))


;; This is the desired result from the modification of the above program
;; to find line counts per play.

'((a-midsummer-nights-dream . 2456) (alls-well-that-ends-well . 3231)
  (henry-v . 3643) (loves-labours-lost . 3015) (macbeth . 2900)
  (the-taming-of-the-shrew . 3033) (the-tempest . 2644) (king-john . 3028)
  (the-merry-wives-of-windsor . 3152) (the-winters-tale . 3595)
  (timon-of-athens . 2852) (troilus-and-cressida . 3983)
  (as-you-like-it . 2974) (cymbeline . 4164) (henry-iv-1 . 3355)
  (henry-vi-1 . 3409) (king-lear . 3985) (sonnets . 2626)
  (the-tragedy-of-antony-and-cleopatra . 4175)
  (the-tragedy-of-coriolanus . 4277) (the-two-gentlemen-of-verona . 2559)
  (a-lovers-complaint . 385) (hamlet . 4553) (henry-iv-2 . 3587)
  (henry-vi-2 . 3646) (measure-for-measure . 3119) (othello . 3897)
  (richard-iii . 4541) (twelfth-night . 2801) (henry-vi-3 . 3524)
  (henry-viii . 3765) (much-ado-about-nothing . 2789)
  (romeo-and-juliet . 3620) (the-duke-of-venice . 2968))