samples-dev/swarm/appengine/main.py - sdk.git - Git at Google

 # Copyright (c) 2011, the Dart project authors.  Please see the AUTHORS file
 # for details. All rights reserved. Use of this source code is governed by a
 # BSD-style license that can be found in the LICENSE file.

 #!/usr/bin/env python3
 #
 import re, base64, logging, pickle, httplib2, time, urlparse, urllib2, urllib, StringIO, gzip, zipfile

 from google.appengine.ext import webapp, db

 from google.appengine.api import taskqueue, urlfetch, memcache, images, users
 from google.appengine.ext.webapp.util import login_required
 from google.appengine.ext.webapp import template

 from django.utils import simplejson as json
 from django.utils.html import strip_tags

 from oauth2client.appengine import CredentialsProperty
 from oauth2client.client import OAuth2WebServerFlow

 import encoder

 # TODO(jimhug): Allow client to request desired thumb size.
 THUMB_SIZE = (57, 57)
 READER_API = 'http://www.google.com/reader/api/0'

 MAX_SECTIONS = 5
 MAX_ARTICLES = 20


 class UserData(db.Model):
     credentials = CredentialsProperty()
     sections = db.ListProperty(db.Key)

     def getEncodedData(self, articleKeys=None):
         enc = encoder.Encoder()
         # TODO(jimhug): Only return initially visible section in first reply.
         maxSections = min(MAX_SECTIONS, len(self.sections))
         enc.writeInt(maxSections)
         for section in db.get(self.sections[:maxSections]):
             section.encode(enc, articleKeys)
         return enc.getRaw()


 class Section(db.Model):
     title = db.TextProperty()
     feeds = db.ListProperty(db.Key)

     def fixedTitle(self):
         return self.title.split('_')[0]

     def encode(self, enc, articleKeys=None):
         # TODO(jimhug): Need to optimize format and support incremental updates.
         enc.writeString(self.key().name())
         enc.writeString(self.fixedTitle())
         enc.writeInt(len(self.feeds))
         for feed in db.get(self.feeds):
             feed.ensureEncodedFeed()
             enc.writeRaw(feed.encodedFeed3)
             if articleKeys is not None:
                 articleKeys.extend(feed.topArticles)


 class Feed(db.Model):
     title = db.TextProperty()
     iconUrl = db.TextProperty()
     lastUpdated = db.IntegerProperty()

     encodedFeed3 = db.TextProperty()
     topArticles = db.ListProperty(db.Key)

     def ensureEncodedFeed(self, force=False):
         if force or self.encodedFeed3 is None:
             enc = encoder.Encoder()
             articleSet = []
             self.encode(enc, MAX_ARTICLES, articleSet)
             logging.info('articleSet length is %s' % len(articleSet))
             self.topArticles = articleSet
             self.encodedFeed3 = enc.getRaw()
             self.put()

     def encode(self, enc, maxArticles, articleSet):
         enc.writeString(self.key().name())
         enc.writeString(self.title)
         enc.writeString(self.iconUrl)

         logging.info('encoding feed: %s' % self.title)
         encodedArts = []

         for article in self.article_set.order('-date').fetch(limit=maxArticles):
             encodedArts.append(article.encodeHeader())
             articleSet.append(article.key())

         enc.writeInt(len(encodedArts))
         enc.writeRaw(''.join(encodedArts))


 class Article(db.Model):
     feed = db.ReferenceProperty(Feed)

     title = db.TextProperty()
     author = db.TextProperty()
     content = db.TextProperty()
     snippet = db.TextProperty()
     thumbnail = db.BlobProperty()
     thumbnailSize = db.TextProperty()
     srcurl = db.TextProperty()
     date = db.IntegerProperty()

     def ensureThumbnail(self):
         # If our desired thumbnail size has changed, regenerate it and cache.
         if self.thumbnailSize != str(THUMB_SIZE):
             self.thumbnail = makeThumbnail(self.content)
             self.thumbnailSize = str(THUMB_SIZE)
             self.put()

     def encodeHeader(self):
         # TODO(jmesserly): for now always unescape until the crawler catches up
         enc = encoder.Encoder()
         enc.writeString(self.key().name())
         enc.writeString(unescape(self.title))
         enc.writeString(self.srcurl)
         enc.writeBool(self.thumbnail is not None)
         enc.writeString(self.author)
         enc.writeInt(self.date)
         enc.writeString(unescape(self.snippet))
         return enc.getRaw()


 class HtmlFile(db.Model):
     content = db.BlobProperty()
     compressed = db.BooleanProperty()
     filename = db.StringProperty()
     author = db.UserProperty(auto_current_user=True)
     date = db.DateTimeProperty(auto_now_add=True)


 class UpdateHtml(webapp.RequestHandler):

     def post(self):
         upload_files = self.request.POST.multi.__dict__['_items']
         version = self.request.get('version')
         logging.info('files: %r' % upload_files)
         for data in upload_files:
             if data[0] != 'files': continue
             file = data[1]
             filename = file.filename
             if version:
                 filename = '%s-%s' % (version, filename)
             logging.info('upload: %r' % filename)

             htmlFile = HtmlFile.get_or_insert(filename)
             htmlFile.filename = filename

             # If text > (1MB - 1KB) then gzip text to fit in 1MB space
             text = file.value
             if len(text) > 1024 * 1023:
                 data = StringIO.StringIO()
                 gz = gzip.GzipFile(str(filename), 'wb', fileobj=data)
                 gz.write(text)
                 gz.close()
                 htmlFile.content = data.getvalue()
                 htmlFile.compressed = True
             else:
                 htmlFile.content = text
                 htmlFile.compressed = False

             htmlFile.put()

         self.redirect('/')


 class TopHandler(webapp.RequestHandler):

     @login_required
     def get(self):
         user = users.get_current_user()
         prefs = UserData.get_by_key_name(user.user_id())
         if prefs is None:
             self.redirect('/update/user')
             return

         params = {'files': HtmlFile.all().order('-date').fetch(limit=30)}
         self.response.out.write(template.render('top.html', params))


 class MainHandler(webapp.RequestHandler):

     @login_required
     def get(self, name):
         if name == 'dev':
             return self.handleDev()

         elif name == 'login':
             return self.handleLogin()

         elif name == 'upload':
             return self.handleUpload()

         user = users.get_current_user()
         prefs = UserData.get_by_key_name(user.user_id())
         if prefs is None:
             return self.handleLogin()

         html = HtmlFile.get_by_key_name(name)
         if html is None:
             self.error(404)
             return

         self.response.headers['Content-Type'] = 'text/html'

         if html.compressed:
             # TODO(jimhug): This slightly sucks ;-)
             # Can we write directly to the response.out?
             gz = gzip.GzipFile(
                 name, 'rb', fileobj=StringIO.StringIO(html.content))
             self.response.out.write(gz.read())
             gz.close()
         else:
             self.response.out.write(html.content)

         # TODO(jimhug): Include first data packet with html.

     def handleLogin(self):
         user = users.get_current_user()
         # TODO(jimhug): Manage secrets for dart.googleplex.com better.
         # TODO(jimhug): Confirm that we need client_secret.
         flow = OAuth2WebServerFlow(
             client_id='267793340506.apps.googleusercontent.com',
             client_secret='5m8H-zyamfTYg5vnpYu1uGMU',
             scope=READER_API,
             user_agent='swarm')

         callback = self.request.relative_url('/oauth2callback')
         authorize_url = flow.step1_get_authorize_url(callback)

         memcache.set(user.user_id(), pickle.dumps(flow))

         content = template.render('login.html', {'authorize': authorize_url})
         self.response.out.write(content)

     def handleDev(self):
         user = users.get_current_user()
         content = template.render('dev.html', {'user': user})
         self.response.out.write(content)

     def handleUpload(self):
         user = users.get_current_user()
         content = template.render('upload.html', {'user': user})
         self.response.out.write(content)


 class UploadFeed(webapp.RequestHandler):

     def post(self):
         upload_files = self.request.POST.multi.__dict__['_items']
         version = self.request.get('version')
         logging.info('files: %r' % upload_files)
         for data in upload_files:
             if data[0] != 'files': continue
             file = data[1]
             logging.info('upload feed: %r' % file.filename)

             data = json.loads(file.value)

             feedId = file.filename
             feed = Feed.get_or_insert(feedId)

             # Find the section to add it to.
             sectionTitle = data['section']
             section = findSectionByTitle(sectionTitle)
             if section != None:
                 if feed.key() in section.feeds:
                     logging.warn('Already contains feed %s, replacing' % feedId)
                     section.feeds.remove(feed.key())

                 # Add the feed to the section.
                 section.feeds.insert(0, feed.key())
                 section.put()

                 # Add the articles.
                 collectFeed(feed, data)

             else:
                 logging.error('Could not find section %s to add the feed to' %
                               sectionTitle)

         self.redirect('/')


 # TODO(jimhug): Batch these up and request them more aggressively.
 class DataHandler(webapp.RequestHandler):

     def get(self, name):
         if name.endswith('.jpg'):
             # Must be a thumbnail
             key = urllib2.unquote(name[:-len('.jpg')])
             article = Article.get_by_key_name(key)
             self.response.headers['Content-Type'] = 'image/jpeg'
             # cache images for 10 hours
             self.response.headers['Cache-Control'] = 'public,max-age=36000'
             article.ensureThumbnail()
             self.response.out.write(article.thumbnail)
         elif name.endswith('.html'):
             # Must be article content
             key = urllib2.unquote(name[:-len('.html')])
             article = Article.get_by_key_name(key)
             self.response.headers['Content-Type'] = 'text/html'
             if article is None:
                 content = '<h2>Missing article</h2>'
             else:
                 content = article.content
             # cache article content for 10 hours
             self.response.headers['Cache-Control'] = 'public,max-age=36000'
             self.response.out.write(content)
         elif name == 'user.data':
             self.response.out.write(self.getUserData())
         elif name == 'CannedData.dart':
             self.canData()
         elif name == 'CannedData.zip':
             self.canDataZip()
         else:
             self.error(404)

     def getUserData(self, articleKeys=None):
         user = users.get_current_user()
         user_id = user.user_id()

         key = 'data_' + user_id
         # need to flush memcache fairly frequently...
         data = memcache.get(key)
         if data is None:
             prefs = UserData.get_or_insert(user_id)
             if prefs is None:
                 # TODO(jimhug): Graceful failure for unknown users.
                 pass
             data = prefs.getEncodedData(articleKeys)
             # TODO(jimhug): memcache.set(key, data)

         return data

     def canData(self):

         def makeDartSafe(data):
             return repr(unicode(data))[1:].replace('$', '\\$')

         lines = [
             '// TODO(jimhug): Work out correct copyright for this file.',
             'class CannedData {'
         ]

         user = users.get_current_user()
         prefs = UserData.get_by_key_name(user.user_id())
         articleKeys = []
         data = prefs.getEncodedData(articleKeys)
         lines.append('  static const Map<String,String> data = const {')
         for article in db.get(articleKeys):
             key = makeDartSafe(urllib.quote(article.key().name()) + '.html')
             lines.append('    %s:%s, ' % (key, makeDartSafe(article.content)))

         lines.append('    "user.data":%s' % makeDartSafe(data))

         lines.append('  };')

         lines.append('}')
         self.response.headers['Content-Type'] = 'application/dart'
         self.response.out.write('\n'.join(lines))

     # Get canned static data
     def canDataZip(self):
         # We need to zip into an in-memory buffer to get the right string encoding
         # behavior.
         data = StringIO.StringIO()
         result = zipfile.ZipFile(data, 'w')

         articleKeys = []
         result.writestr('data/user.data',
                         self.getUserData(articleKeys).encode('utf-8'))
         logging.info('  adding articles %s' % len(articleKeys))
         images = []
         for article in db.get(articleKeys):
             article.ensureThumbnail()
             path = 'data/' + article.key().name() + '.html'
             result.writestr(
                 path.encode('utf-8'), article.content.encode('utf-8'))
             if article.thumbnail:
                 path = 'data/' + article.key().name() + '.jpg'
                 result.writestr(path.encode('utf-8'), article.thumbnail)

         result.close()
         logging.info('writing CannedData.zip')
         self.response.headers['Content-Type'] = 'multipart/x-zip'
         disposition = 'attachment; filename=CannedData.zip'
         self.response.headers['Content-Disposition'] = disposition
         self.response.out.write(data.getvalue())
         data.close()


 class SetDefaultFeeds(webapp.RequestHandler):

     @login_required
     def get(self):
         user = users.get_current_user()
         prefs = UserData.get_or_insert(user.user_id())

         prefs.sections = [
             db.Key.from_path('Section', 'user/17857667084667353155/label/Top'),
             db.Key.from_path('Section',
                              'user/17857667084667353155/label/Design'),
             db.Key.from_path('Section', 'user/17857667084667353155/label/Eco'),
             db.Key.from_path('Section', 'user/17857667084667353155/label/Geek'),
             db.Key.from_path('Section',
                              'user/17857667084667353155/label/Google'),
             db.Key.from_path('Section',
                              'user/17857667084667353155/label/Seattle'),
             db.Key.from_path('Section', 'user/17857667084667353155/label/Tech'),
             db.Key.from_path('Section', 'user/17857667084667353155/label/Web')
         ]

         prefs.put()

         self.redirect('/')


 class SetTestFeeds(webapp.RequestHandler):

     @login_required
     def get(self):
         user = users.get_current_user()
         prefs = UserData.get_or_insert(user.user_id())

         sections = []
         for i in range(3):
             s1 = Section.get_or_insert('Test%d' % i)
             s1.title = 'Section %d' % (i + 1)

             feeds = []
             for j in range(4):
                 label = '%d_%d' % (i, j)
                 f1 = Feed.get_or_insert('Test%s' % label)
                 f1.title = 'Feed %s' % label
                 f1.iconUrl = getFeedIcon('http://google.com')
                 f1.lastUpdated = 0
                 f1.put()
                 feeds.append(f1.key())

                 for k in range(8):
                     label = '%d_%d_%d' % (i, j, k)
                     a1 = Article.get_or_insert('Test%s' % label)
                     if a1.title is None:
                         a1.feed = f1
                         a1.title = 'Article %s' % label
                         a1.author = 'anon'
                         a1.content = 'Lorem ipsum something or other...'
                         a1.snippet = 'Lorem ipsum something or other...'
                         a1.thumbnail = None
                         a1.srcurl = ''
                         a1.date = 0

             s1.feeds = feeds
             s1.put()
             sections.append(s1.key())

         prefs.sections = sections
         prefs.put()

         self.redirect('/')


 class UserLoginHandler(webapp.RequestHandler):

     @login_required
     def get(self):
         user = users.get_current_user()
         prefs = UserData.get_or_insert(user.user_id())
         if prefs.credentials:
             http = prefs.credentials.authorize(httplib2.Http())

             response, content = http.request(
                 '%s/subscription/list?output=json' % READER_API)
             self.collectFeeds(prefs, content)
             self.redirect('/')
         else:
             self.redirect('/login')

     def collectFeeds(self, prefs, content):
         data = json.loads(content)

         queue_name = self.request.get('queue_name', 'priority-queue')
         sections = {}
         for feedData in data['subscriptions']:
             feed = Feed.get_or_insert(feedData['id'])
             feed.put()
             category = feedData['categories'][0]
             categoryId = category['id']
             if not sections.has_key(categoryId):
                 sections[categoryId] = (category['label'], [])

             # TODO(jimhug): Use Reader preferences to sort feeds in a section.
             sections[categoryId][1].append(feed.key())

             # Kick off a high priority feed update
             taskqueue.add(
                 url='/update/feed',
                 queue_name=queue_name,
                 params={'id': feed.key().name()})

         sectionKeys = []
         for name, (title, feeds) in sections.items():
             section = Section.get_or_insert(name)
             section.feeds = feeds
             section.title = title
             section.put()
             # Forces Top to be the first section
             if title == 'Top': title = '0Top'
             sectionKeys.append((title, section.key()))

         # TODO(jimhug): Use Reader preferences API to get users true sort order.
         prefs.sections = [key for t, key in sorted(sectionKeys)]
         prefs.put()


 class AllFeedsCollector(webapp.RequestHandler):
     '''Ensures that a given feed object is locally up to date.'''

     def post(self):
         return self.get()

     def get(self):
         queue_name = self.request.get('queue_name', 'background')
         for feed in Feed.all():
             taskqueue.add(
                 url='/update/feed',
                 queue_name=queue_name,
                 params={'id': feed.key().name()})


 UPDATE_COUNT = 4  # The number of articles to request on periodic updates.
 INITIAL_COUNT = 40  # The number of articles to get first for a new queue.
 SNIPPET_SIZE = 180  # The length of plain-text snippet to extract.


 class FeedCollector(webapp.RequestHandler):

     def post(self):
         return self.get()

     def get(self):
         feedId = self.request.get('id')
         feed = Feed.get_or_insert(feedId)

         if feed.lastUpdated is None:
             self.fetchn(feed, feedId, INITIAL_COUNT)
         else:
             self.fetchn(feed, feedId, UPDATE_COUNT)

         self.response.headers['Content-Type'] = "text/plain"

     def fetchn(self, feed, feedId, n, continuation=None):
         # basic pattern is to read by ARTICLE_COUNT until we hit existing.
         if continuation is None:
             apiUrl = '%s/stream/contents/%s?n=%d' % (READER_API, feedId, n)
         else:
             apiUrl = '%s/stream/contents/%s?n=%d&c=%s' % (READER_API, feedId, n,
                                                           continuation)

         logging.info('fetching: %s' % apiUrl)
         result = urlfetch.fetch(apiUrl)

         if result.status_code == 200:
             data = json.loads(result.content)
             collectFeed(feed, data, continuation)
         elif result.status_code == 401:
             self.response.out.write('<pre>%s</pre>' % result.content)
         else:
             self.response.out.write(result.status_code)


 def findSectionByTitle(title):
     for section in Section.all():
         if section.fixedTitle() == title:
             return section
     return None


 def collectFeed(feed, data, continuation=None):
     '''
   Reads a feed from the given JSON object and populates the given feed object
   in the datastore with its data.
   '''
     if continuation is None:
         if 'alternate' in data:
             feed.iconUrl = getFeedIcon(data['alternate'][0]['href'])
         feed.title = data['title']
         feed.lastUpdated = data['updated']

     articles = data['items']
     logging.info('%d new articles for %s' % (len(articles), feed.title))

     for articleData in articles:
         if not collectArticle(feed, articleData):
             feed.put()
             return False

     if len(articles) > 0 and data.has_key('continuation'):
         logging.info('would have looked for more articles')
         # TODO(jimhug): Enable this continuation check when more robust
         #self.fetchn(feed, feedId, data['continuation'])

     feed.ensureEncodedFeed(force=True)
     feed.put()
     return True


 def collectArticle(feed, data):
     '''
   Reads an article from the given JSON object and populates the datastore with
   it.
   '''
     if not 'title' in data:
         # Skip this articles without titles
         return True

     articleId = data['id']
     article = Article.get_or_insert(articleId)
     # TODO(jimhug): This aborts too early - at lease for one adafruit case.
     if article.date == data['published']:
         logging.info(
             'found existing, aborting: %r, %r' % (articleId, article.date))
         return False

     if data.has_key('content'):
         content = data['content']['content']
     elif data.has_key('summary'):
         content = data['summary']['content']
     else:
         content = ''
     #TODO(jimhug): better summary?
     article.content = content
     article.date = data['published']
     article.title = unescape(data['title'])
     article.snippet = unescape(strip_tags(content)[:SNIPPET_SIZE])

     article.feed = feed

     # TODO(jimhug): make this canonical so UX can change for this state
     article.author = data.get('author', 'anonymous')

     article.ensureThumbnail()

     article.srcurl = ''
     if data.has_key('alternate'):
         for alt in data['alternate']:
             if alt.has_key('href'):
                 article.srcurl = alt['href']
     return True


 def unescape(html):
     "Inverse of Django's utils.html.escape function"
     if not isinstance(html, basestring):
         html = str(html)
     html = html.replace('&#39;', "'").replace('&quot;', '"')
     return html.replace('&gt;', '>').replace('&lt;', '<').replace('&amp;', '&')


 def getFeedIcon(url):
     url = urlparse.urlparse(url).netloc
     return 'http://s2.googleusercontent.com/s2/favicons?domain=%s&alt=feed' % url


 def findImage(text):
     img = findImgTag(text, 'jpg|jpeg|png')
     if img is not None:
         return img

     img = findVideoTag(text)
     if img is not None:
         return img

     img = findImgTag(text, 'gif')
     return img


 def findImgTag(text, extensions):
     m = re.search(r'src="(http://\S+\.(%s))(\?.*)?"' % extensions, text)
     if m is None:
         return None
     return m.group(1)


 def findVideoTag(text):
     # TODO(jimhug): Add other videos beyond youtube.
     m = re.search(r'src="http://www.youtube.com/(\S+)/(\S+)[/|"]', text)
     if m is None:
         return None

     return 'http://img.youtube.com/vi/%s/0.jpg' % m.group(2)


 def makeThumbnail(text):
     url = None
     try:
         url = findImage(text)
         if url is None:
             return None
         return generateThumbnail(url)
     except:
         logging.info('error decoding: %s' % (url or text))
         return None


 def generateThumbnail(url):
     logging.info('generating thumbnail: %s' % url)
     thumbWidth, thumbHeight = THUMB_SIZE

     result = urlfetch.fetch(url)
     img = images.Image(result.content)

     w, h = img.width, img.height

     aspect = float(w) / h
     thumbAspect = float(thumbWidth) / thumbHeight

     if aspect > thumbAspect:
         # Too wide, so crop on the sides.
         normalizedCrop = (w - h * thumbAspect) / (2.0 * w)
         img.crop(normalizedCrop, 0., 1. - normalizedCrop, 1.)
     elif aspect < thumbAspect:
         # Too tall, so crop out the bottom.
         normalizedCrop = (h - w / thumbAspect) / h
         img.crop(0., 0., 1., 1. - normalizedCrop)

     img.resize(thumbWidth, thumbHeight)

     # Chose JPEG encoding because informal experiments showed it generated
     # the best size to quality ratio for thumbnail images.
     nimg = img.execute_transforms(output_encoding=images.JPEG)
     logging.info('  finished thumbnail: %s' % url)

     return nimg


 class OAuthHandler(webapp.RequestHandler):

     @login_required
     def get(self):
         user = users.get_current_user()
         flow = pickle.loads(memcache.get(user.user_id()))
         if flow:
             prefs = UserData.get_or_insert(user.user_id())
             prefs.credentials = flow.step2_exchange(self.request.params)
             prefs.put()
             self.redirect('/update/user')
         else:
             pass


 def main():
     application = webapp.WSGIApplication(
         [
             ('/data/(.*)', DataHandler),

             # This is called periodically from cron.yaml.
             ('/update/allFeeds', AllFeedsCollector),
             ('/update/feed', FeedCollector),
             ('/update/user', UserLoginHandler),
             ('/update/defaultFeeds', SetDefaultFeeds),
             ('/update/testFeeds', SetTestFeeds),
             ('/update/html', UpdateHtml),
             ('/update/upload', UploadFeed),
             ('/oauth2callback', OAuthHandler),
             ('/', TopHandler),
             ('/(.*)', MainHandler),
         ],
         debug=True)
     webapp.util.run_wsgi_app(application)


 if __name__ == '__main__':
     main()
	# Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
	# for details. All rights reserved. Use of this source code is governed by a
	# BSD-style license that can be found in the LICENSE file.

	#!/usr/bin/env python3
	#
	import re, base64, logging, pickle, httplib2, time, urlparse, urllib2, urllib, StringIO, gzip, zipfile

	from google.appengine.ext import webapp, db

	from google.appengine.api import taskqueue, urlfetch, memcache, images, users
	from google.appengine.ext.webapp.util import login_required
	from google.appengine.ext.webapp import template

	from django.utils import simplejson as json
	from django.utils.html import strip_tags

	from oauth2client.appengine import CredentialsProperty
	from oauth2client.client import OAuth2WebServerFlow

	import encoder

	# TODO(jimhug): Allow client to request desired thumb size.
	THUMB_SIZE = (57, 57)
	READER_API = 'http://www.google.com/reader/api/0'

	MAX_SECTIONS = 5
	MAX_ARTICLES = 20


	class UserData(db.Model):
	credentials = CredentialsProperty()
	sections = db.ListProperty(db.Key)

	def getEncodedData(self, articleKeys=None):
	enc = encoder.Encoder()
	# TODO(jimhug): Only return initially visible section in first reply.
	maxSections = min(MAX_SECTIONS, len(self.sections))
	enc.writeInt(maxSections)
	for section in db.get(self.sections[:maxSections]):
	section.encode(enc, articleKeys)
	return enc.getRaw()


	class Section(db.Model):
	title = db.TextProperty()
	feeds = db.ListProperty(db.Key)

	def fixedTitle(self):
	return self.title.split('_')[0]

	def encode(self, enc, articleKeys=None):
	# TODO(jimhug): Need to optimize format and support incremental updates.
	enc.writeString(self.key().name())
	enc.writeString(self.fixedTitle())
	enc.writeInt(len(self.feeds))
	for feed in db.get(self.feeds):
	feed.ensureEncodedFeed()
	enc.writeRaw(feed.encodedFeed3)
	if articleKeys is not None:
	articleKeys.extend(feed.topArticles)


	class Feed(db.Model):
	title = db.TextProperty()
	iconUrl = db.TextProperty()
	lastUpdated = db.IntegerProperty()

	encodedFeed3 = db.TextProperty()
	topArticles = db.ListProperty(db.Key)

	def ensureEncodedFeed(self, force=False):
	if force or self.encodedFeed3 is None:
	enc = encoder.Encoder()
	articleSet = []
	self.encode(enc, MAX_ARTICLES, articleSet)
	logging.info('articleSet length is %s' % len(articleSet))
	self.topArticles = articleSet
	self.encodedFeed3 = enc.getRaw()
	self.put()

	def encode(self, enc, maxArticles, articleSet):
	enc.writeString(self.key().name())
	enc.writeString(self.title)
	enc.writeString(self.iconUrl)

	logging.info('encoding feed: %s' % self.title)
	encodedArts = []

	for article in self.article_set.order('-date').fetch(limit=maxArticles):
	encodedArts.append(article.encodeHeader())
	articleSet.append(article.key())

	enc.writeInt(len(encodedArts))
	enc.writeRaw(''.join(encodedArts))


	class Article(db.Model):
	feed = db.ReferenceProperty(Feed)

	title = db.TextProperty()
	author = db.TextProperty()
	content = db.TextProperty()
	snippet = db.TextProperty()
	thumbnail = db.BlobProperty()
	thumbnailSize = db.TextProperty()
	srcurl = db.TextProperty()
	date = db.IntegerProperty()

	def ensureThumbnail(self):
	# If our desired thumbnail size has changed, regenerate it and cache.
	if self.thumbnailSize != str(THUMB_SIZE):
	self.thumbnail = makeThumbnail(self.content)
	self.thumbnailSize = str(THUMB_SIZE)
	self.put()

	def encodeHeader(self):
	# TODO(jmesserly): for now always unescape until the crawler catches up
	enc = encoder.Encoder()
	enc.writeString(self.key().name())
	enc.writeString(unescape(self.title))
	enc.writeString(self.srcurl)
	enc.writeBool(self.thumbnail is not None)
	enc.writeString(self.author)
	enc.writeInt(self.date)
	enc.writeString(unescape(self.snippet))
	return enc.getRaw()


	class HtmlFile(db.Model):
	content = db.BlobProperty()
	compressed = db.BooleanProperty()
	filename = db.StringProperty()
	author = db.UserProperty(auto_current_user=True)
	date = db.DateTimeProperty(auto_now_add=True)


	class UpdateHtml(webapp.RequestHandler):

	def post(self):
	upload_files = self.request.POST.multi.__dict__['_items']
	version = self.request.get('version')
	logging.info('files: %r' % upload_files)
	for data in upload_files:
	if data[0] != 'files': continue
	file = data[1]
	filename = file.filename
	if version:
	filename = '%s-%s' % (version, filename)
	logging.info('upload: %r' % filename)

	htmlFile = HtmlFile.get_or_insert(filename)
	htmlFile.filename = filename

	# If text > (1MB - 1KB) then gzip text to fit in 1MB space
	text = file.value
	if len(text) > 1024 * 1023:
	data = StringIO.StringIO()
	gz = gzip.GzipFile(str(filename), 'wb', fileobj=data)
	gz.write(text)
	gz.close()
	htmlFile.content = data.getvalue()
	htmlFile.compressed = True
	else:
	htmlFile.content = text
	htmlFile.compressed = False

	htmlFile.put()

	self.redirect('/')


	class TopHandler(webapp.RequestHandler):

	@login_required
	def get(self):
	user = users.get_current_user()
	prefs = UserData.get_by_key_name(user.user_id())
	if prefs is None:
	self.redirect('/update/user')
	return

	params = {'files': HtmlFile.all().order('-date').fetch(limit=30)}
	self.response.out.write(template.render('top.html', params))


	class MainHandler(webapp.RequestHandler):

	@login_required
	def get(self, name):
	if name == 'dev':
	return self.handleDev()

	elif name == 'login':
	return self.handleLogin()

	elif name == 'upload':
	return self.handleUpload()

	user = users.get_current_user()
	prefs = UserData.get_by_key_name(user.user_id())
	if prefs is None:
	return self.handleLogin()

	html = HtmlFile.get_by_key_name(name)
	if html is None:
	self.error(404)
	return

	self.response.headers['Content-Type'] = 'text/html'

	if html.compressed:
	# TODO(jimhug): This slightly sucks ;-)
	# Can we write directly to the response.out?
	gz = gzip.GzipFile(
	name, 'rb', fileobj=StringIO.StringIO(html.content))
	self.response.out.write(gz.read())
	gz.close()
	else:
	self.response.out.write(html.content)

	# TODO(jimhug): Include first data packet with html.

	def handleLogin(self):
	user = users.get_current_user()
	# TODO(jimhug): Manage secrets for dart.googleplex.com better.
	# TODO(jimhug): Confirm that we need client_secret.
	flow = OAuth2WebServerFlow(
	client_id='267793340506.apps.googleusercontent.com',
	client_secret='5m8H-zyamfTYg5vnpYu1uGMU',
	scope=READER_API,
	user_agent='swarm')

	callback = self.request.relative_url('/oauth2callback')
	authorize_url = flow.step1_get_authorize_url(callback)

	memcache.set(user.user_id(), pickle.dumps(flow))

	content = template.render('login.html', {'authorize': authorize_url})
	self.response.out.write(content)

	def handleDev(self):
	user = users.get_current_user()
	content = template.render('dev.html', {'user': user})
	self.response.out.write(content)

	def handleUpload(self):
	user = users.get_current_user()
	content = template.render('upload.html', {'user': user})
	self.response.out.write(content)


	class UploadFeed(webapp.RequestHandler):

	def post(self):
	upload_files = self.request.POST.multi.__dict__['_items']
	version = self.request.get('version')
	logging.info('files: %r' % upload_files)
	for data in upload_files:
	if data[0] != 'files': continue
	file = data[1]
	logging.info('upload feed: %r' % file.filename)

	data = json.loads(file.value)

	feedId = file.filename
	feed = Feed.get_or_insert(feedId)

	# Find the section to add it to.
	sectionTitle = data['section']
	section = findSectionByTitle(sectionTitle)
	if section != None:
	if feed.key() in section.feeds:
	logging.warn('Already contains feed %s, replacing' % feedId)
	section.feeds.remove(feed.key())

	# Add the feed to the section.
	section.feeds.insert(0, feed.key())
	section.put()

	# Add the articles.
	collectFeed(feed, data)

	else:
	logging.error('Could not find section %s to add the feed to' %
	sectionTitle)

	self.redirect('/')


	# TODO(jimhug): Batch these up and request them more aggressively.
	class DataHandler(webapp.RequestHandler):

	def get(self, name):
	if name.endswith('.jpg'):
	# Must be a thumbnail
	key = urllib2.unquote(name[:-len('.jpg')])
	article = Article.get_by_key_name(key)
	self.response.headers['Content-Type'] = 'image/jpeg'
	# cache images for 10 hours
	self.response.headers['Cache-Control'] = 'public,max-age=36000'
	article.ensureThumbnail()
	self.response.out.write(article.thumbnail)
	elif name.endswith('.html'):
	# Must be article content
	key = urllib2.unquote(name[:-len('.html')])
	article = Article.get_by_key_name(key)
	self.response.headers['Content-Type'] = 'text/html'
	if article is None:
	content = '<h2>Missing article</h2>'
	else:
	content = article.content
	# cache article content for 10 hours
	self.response.headers['Cache-Control'] = 'public,max-age=36000'
	self.response.out.write(content)
	elif name == 'user.data':
	self.response.out.write(self.getUserData())
	elif name == 'CannedData.dart':
	self.canData()
	elif name == 'CannedData.zip':
	self.canDataZip()
	else:
	self.error(404)

	def getUserData(self, articleKeys=None):
	user = users.get_current_user()
	user_id = user.user_id()

	key = 'data_' + user_id
	# need to flush memcache fairly frequently...
	data = memcache.get(key)
	if data is None:
	prefs = UserData.get_or_insert(user_id)
	if prefs is None:
	# TODO(jimhug): Graceful failure for unknown users.
	pass
	data = prefs.getEncodedData(articleKeys)
	# TODO(jimhug): memcache.set(key, data)

	return data

	def canData(self):

	def makeDartSafe(data):
	return repr(unicode(data))[1:].replace('$', '\\$')

	lines = [
	'// TODO(jimhug): Work out correct copyright for this file.',
	'class CannedData {'
	]

	user = users.get_current_user()
	prefs = UserData.get_by_key_name(user.user_id())
	articleKeys = []
	data = prefs.getEncodedData(articleKeys)
	lines.append(' static const Map<String,String> data = const {')
	for article in db.get(articleKeys):
	key = makeDartSafe(urllib.quote(article.key().name()) + '.html')
	lines.append(' %s:%s, ' % (key, makeDartSafe(article.content)))

	lines.append(' "user.data":%s' % makeDartSafe(data))

	lines.append(' };')

	lines.append('}')
	self.response.headers['Content-Type'] = 'application/dart'
	self.response.out.write('\n'.join(lines))

	# Get canned static data
	def canDataZip(self):
	# We need to zip into an in-memory buffer to get the right string encoding
	# behavior.
	data = StringIO.StringIO()
	result = zipfile.ZipFile(data, 'w')

	articleKeys = []
	result.writestr('data/user.data',
	self.getUserData(articleKeys).encode('utf-8'))
	logging.info(' adding articles %s' % len(articleKeys))
	images = []
	for article in db.get(articleKeys):
	article.ensureThumbnail()
	path = 'data/' + article.key().name() + '.html'
	result.writestr(
	path.encode('utf-8'), article.content.encode('utf-8'))
	if article.thumbnail:
	path = 'data/' + article.key().name() + '.jpg'
	result.writestr(path.encode('utf-8'), article.thumbnail)

	result.close()
	logging.info('writing CannedData.zip')
	self.response.headers['Content-Type'] = 'multipart/x-zip'
	disposition = 'attachment; filename=CannedData.zip'
	self.response.headers['Content-Disposition'] = disposition
	self.response.out.write(data.getvalue())
	data.close()


	class SetDefaultFeeds(webapp.RequestHandler):

	@login_required
	def get(self):
	user = users.get_current_user()
	prefs = UserData.get_or_insert(user.user_id())

	prefs.sections = [
	db.Key.from_path('Section', 'user/17857667084667353155/label/Top'),
	db.Key.from_path('Section',
	'user/17857667084667353155/label/Design'),
	db.Key.from_path('Section', 'user/17857667084667353155/label/Eco'),
	db.Key.from_path('Section', 'user/17857667084667353155/label/Geek'),
	db.Key.from_path('Section',
	'user/17857667084667353155/label/Google'),
	db.Key.from_path('Section',
	'user/17857667084667353155/label/Seattle'),
	db.Key.from_path('Section', 'user/17857667084667353155/label/Tech'),
	db.Key.from_path('Section', 'user/17857667084667353155/label/Web')
	]

	prefs.put()

	self.redirect('/')


	class SetTestFeeds(webapp.RequestHandler):

	@login_required
	def get(self):
	user = users.get_current_user()
	prefs = UserData.get_or_insert(user.user_id())

	sections = []
	for i in range(3):
	s1 = Section.get_or_insert('Test%d' % i)
	s1.title = 'Section %d' % (i + 1)

	feeds = []
	for j in range(4):
	label = '%d_%d' % (i, j)
	f1 = Feed.get_or_insert('Test%s' % label)
	f1.title = 'Feed %s' % label
	f1.iconUrl = getFeedIcon('http://google.com')
	f1.lastUpdated = 0
	f1.put()
	feeds.append(f1.key())

	for k in range(8):
	label = '%d_%d_%d' % (i, j, k)
	a1 = Article.get_or_insert('Test%s' % label)
	if a1.title is None:
	a1.feed = f1
	a1.title = 'Article %s' % label
	a1.author = 'anon'
	a1.content = 'Lorem ipsum something or other...'
	a1.snippet = 'Lorem ipsum something or other...'
	a1.thumbnail = None
	a1.srcurl = ''
	a1.date = 0

	s1.feeds = feeds
	s1.put()
	sections.append(s1.key())

	prefs.sections = sections
	prefs.put()

	self.redirect('/')


	class UserLoginHandler(webapp.RequestHandler):

	@login_required
	def get(self):
	user = users.get_current_user()
	prefs = UserData.get_or_insert(user.user_id())
	if prefs.credentials:
	http = prefs.credentials.authorize(httplib2.Http())

	response, content = http.request(
	'%s/subscription/list?output=json' % READER_API)
	self.collectFeeds(prefs, content)
	self.redirect('/')
	else:
	self.redirect('/login')

	def collectFeeds(self, prefs, content):
	data = json.loads(content)

	queue_name = self.request.get('queue_name', 'priority-queue')
	sections = {}
	for feedData in data['subscriptions']:
	feed = Feed.get_or_insert(feedData['id'])
	feed.put()
	category = feedData['categories'][0]
	categoryId = category['id']
	if not sections.has_key(categoryId):
	sections[categoryId] = (category['label'], [])

	# TODO(jimhug): Use Reader preferences to sort feeds in a section.
	sections[categoryId][1].append(feed.key())

	# Kick off a high priority feed update
	taskqueue.add(
	url='/update/feed',
	queue_name=queue_name,
	params={'id': feed.key().name()})

	sectionKeys = []
	for name, (title, feeds) in sections.items():
	section = Section.get_or_insert(name)
	section.feeds = feeds
	section.title = title
	section.put()
	# Forces Top to be the first section
	if title == 'Top': title = '0Top'
	sectionKeys.append((title, section.key()))

	# TODO(jimhug): Use Reader preferences API to get users true sort order.
	prefs.sections = [key for t, key in sorted(sectionKeys)]
	prefs.put()


	class AllFeedsCollector(webapp.RequestHandler):
	'''Ensures that a given feed object is locally up to date.'''

	def post(self):
	return self.get()

	def get(self):
	queue_name = self.request.get('queue_name', 'background')
	for feed in Feed.all():
	taskqueue.add(
	url='/update/feed',
	queue_name=queue_name,
	params={'id': feed.key().name()})


	UPDATE_COUNT = 4 # The number of articles to request on periodic updates.
	INITIAL_COUNT = 40 # The number of articles to get first for a new queue.
	SNIPPET_SIZE = 180 # The length of plain-text snippet to extract.


	class FeedCollector(webapp.RequestHandler):

	def post(self):
	return self.get()

	def get(self):
	feedId = self.request.get('id')
	feed = Feed.get_or_insert(feedId)

	if feed.lastUpdated is None:
	self.fetchn(feed, feedId, INITIAL_COUNT)
	else:
	self.fetchn(feed, feedId, UPDATE_COUNT)

	self.response.headers['Content-Type'] = "text/plain"

	def fetchn(self, feed, feedId, n, continuation=None):
	# basic pattern is to read by ARTICLE_COUNT until we hit existing.
	if continuation is None:
	apiUrl = '%s/stream/contents/%s?n=%d' % (READER_API, feedId, n)
	else:
	apiUrl = '%s/stream/contents/%s?n=%d&c=%s' % (READER_API, feedId, n,
	continuation)

	logging.info('fetching: %s' % apiUrl)
	result = urlfetch.fetch(apiUrl)

	if result.status_code == 200:
	data = json.loads(result.content)
	collectFeed(feed, data, continuation)
	elif result.status_code == 401:
	self.response.out.write('<pre>%s</pre>' % result.content)
	else:
	self.response.out.write(result.status_code)


	def findSectionByTitle(title):
	for section in Section.all():
	if section.fixedTitle() == title:
	return section
	return None


	def collectFeed(feed, data, continuation=None):
	'''
	Reads a feed from the given JSON object and populates the given feed object
	in the datastore with its data.
	'''
	if continuation is None:
	if 'alternate' in data:
	feed.iconUrl = getFeedIcon(data['alternate'][0]['href'])
	feed.title = data['title']
	feed.lastUpdated = data['updated']

	articles = data['items']
	logging.info('%d new articles for %s' % (len(articles), feed.title))

	for articleData in articles:
	if not collectArticle(feed, articleData):
	feed.put()
	return False

	if len(articles) > 0 and data.has_key('continuation'):
	logging.info('would have looked for more articles')
	# TODO(jimhug): Enable this continuation check when more robust
	#self.fetchn(feed, feedId, data['continuation'])

	feed.ensureEncodedFeed(force=True)
	feed.put()
	return True


	def collectArticle(feed, data):
	'''
	Reads an article from the given JSON object and populates the datastore with
	it.
	'''
	if not 'title' in data:
	# Skip this articles without titles
	return True

	articleId = data['id']
	article = Article.get_or_insert(articleId)
	# TODO(jimhug): This aborts too early - at lease for one adafruit case.
	if article.date == data['published']:
	logging.info(
	'found existing, aborting: %r, %r' % (articleId, article.date))
	return False

	if data.has_key('content'):
	content = data['content']['content']
	elif data.has_key('summary'):
	content = data['summary']['content']
	else:
	content = ''
	#TODO(jimhug): better summary?
	article.content = content
	article.date = data['published']
	article.title = unescape(data['title'])
	article.snippet = unescape(strip_tags(content)[:SNIPPET_SIZE])

	article.feed = feed

	# TODO(jimhug): make this canonical so UX can change for this state
	article.author = data.get('author', 'anonymous')

	article.ensureThumbnail()

	article.srcurl = ''
	if data.has_key('alternate'):
	for alt in data['alternate']:
	if alt.has_key('href'):
	article.srcurl = alt['href']
	return True


	def unescape(html):
	"Inverse of Django's utils.html.escape function"
	if not isinstance(html, basestring):
	html = str(html)
	html = html.replace(''', "'").replace('"', '"')
	return html.replace('>', '>').replace('<', '<').replace('&', '&')


	def getFeedIcon(url):
	url = urlparse.urlparse(url).netloc
	return 'http://s2.googleusercontent.com/s2/favicons?domain=%s&alt=feed' % url


	def findImage(text):
	img = findImgTag(text, 'jpg\|jpeg\|png')
	if img is not None:
	return img

	img = findVideoTag(text)
	if img is not None:
	return img

	img = findImgTag(text, 'gif')
	return img


	def findImgTag(text, extensions):
	m = re.search(r'src="(http://\S+\.(%s))(\?.*)?"' % extensions, text)
	if m is None:
	return None
	return m.group(1)


	def findVideoTag(text):
	# TODO(jimhug): Add other videos beyond youtube.
	m = re.search(r'src="http://www.youtube.com/(\S+)/(\S+)[/\|"]', text)
	if m is None:
	return None

	return 'http://img.youtube.com/vi/%s/0.jpg' % m.group(2)


	def makeThumbnail(text):
	url = None
	try:
	url = findImage(text)
	if url is None:
	return None
	return generateThumbnail(url)
	except:
	logging.info('error decoding: %s' % (url or text))
	return None


	def generateThumbnail(url):
	logging.info('generating thumbnail: %s' % url)
	thumbWidth, thumbHeight = THUMB_SIZE

	result = urlfetch.fetch(url)
	img = images.Image(result.content)

	w, h = img.width, img.height

	aspect = float(w) / h
	thumbAspect = float(thumbWidth) / thumbHeight

	if aspect > thumbAspect:
	# Too wide, so crop on the sides.
	normalizedCrop = (w - h * thumbAspect) / (2.0 * w)
	img.crop(normalizedCrop, 0., 1. - normalizedCrop, 1.)
	elif aspect < thumbAspect:
	# Too tall, so crop out the bottom.
	normalizedCrop = (h - w / thumbAspect) / h
	img.crop(0., 0., 1., 1. - normalizedCrop)

	img.resize(thumbWidth, thumbHeight)

	# Chose JPEG encoding because informal experiments showed it generated
	# the best size to quality ratio for thumbnail images.
	nimg = img.execute_transforms(output_encoding=images.JPEG)
	logging.info(' finished thumbnail: %s' % url)

	return nimg


	class OAuthHandler(webapp.RequestHandler):

	@login_required
	def get(self):
	user = users.get_current_user()
	flow = pickle.loads(memcache.get(user.user_id()))
	if flow:
	prefs = UserData.get_or_insert(user.user_id())
	prefs.credentials = flow.step2_exchange(self.request.params)
	prefs.put()
	self.redirect('/update/user')
	else:
	pass


	def main():
	application = webapp.WSGIApplication(
	[
	('/data/(.*)', DataHandler),

	# This is called periodically from cron.yaml.
	('/update/allFeeds', AllFeedsCollector),
	('/update/feed', FeedCollector),
	('/update/user', UserLoginHandler),
	('/update/defaultFeeds', SetDefaultFeeds),
	('/update/testFeeds', SetTestFeeds),
	('/update/html', UpdateHtml),
	('/update/upload', UploadFeed),
	('/oauth2callback', OAuthHandler),
	('/', TopHandler),
	('/(.*)', MainHandler),
	],
	debug=True)
	webapp.util.run_wsgi_app(application)


	if __name__ == '__main__':
	main()