#!/usr/bin/env python3
import requests
import re
import json
import itertools
TUMBLR_URL_RE = r'https?://([^.]+\.tumblr\.com)(?:/.*)$'
IMAGE_URL_RE = r'https?://[^.]+\.media\.tumblr\.com/(.*)'
def danbooru_api_whatever(method, thing, *args, **kw):
response = method('https://danbooru.donmai.us/{}.json'.format(thing), *args, **kw)
response.raise_for_status()
return response.json()
def danbooru_api_get(thing, params=None):
p = dict(auth_data['danbooru'])
if params:
p.update(params)
return danbooru_api_whatever(requests.get, thing, params=p)
def danbooru_api_put(thing, data):
body = dict(auth_data['danbooru'])
body.update(data)
return danbooru_api_whatever(requests.put, thing, body)
def get_posts(*tags):
# An iterator over all posts matching tags, doing pages as
# necessary. Fetches pages lazily. Makes no attempt to remove
# duplicates in case a post is added while working.
params = {
'tags': ' '.join(tags),
'page': 1
}
for page in iter(lambda: danbooru_api_get('posts', params), []):
for post in page:
yield post
params['page'] += 1
def all_photo_posts(tumblr_domain):
# Iterator over all photo posts from a given tumblr blog. Fetches
# posts lazily as necessary. Actually, it returns individual image
# URLs along with the URL of the post they come from, or something
# like that. I'm too lazy to properly document it, but that's okay
# because nobody will ever read this.
url = 'https://api.tumblr.com/v2/blog/{}/posts'.format(tumblr_domain)
params = {
'type': 'photo',
'offset': 0,
'api_key': auth_data['tumblr']['api_key']
}
posts = True
while posts:
response = requests.get(url, params=params)
response.raise_for_status()
posts = response.json()['response']['posts']
for post in posts:
for photo in post['photos']:
if 'original_size' in photo:
yield (photo['original_size']['url'], post['post_url'])
for size in photo['alt_sizes']:
yield (size['url'], post['post_url'])
params['offset'] += len(posts)
def main(argv):
with open('auth.json', 'r') as f:
globals()['auth_data'] = json.load(f)
if not ('danbooru' in auth_data and 'tumblr' in auth_data):
raise RuntimeError('auth stuff not provided')
if len(argv) < 2:
return 'usage: {} artist_name [tumblr_url ...]'.format(argv[0])
artist_url_match = re.match(r'https?://danbooru\.donmai\.us/artists/([^/]+)', argv[1])
print('Looking up artist...')
if artist_url_match:
artist = danbooru_api_get('artists/' + artist_url_match.group(1))
else:
artist_name = argv[1].replace(' ', '_')
for artist in danbooru_api_get('artists', {'search[name]': 'name:' + artist_name}):
# Look for an exact match
if artist['name'] == artist_name:
break
else: # No break means nothing matched
return 'No such artist: {!r}'.format(artist_name)
tumblr_blogs = [
# From command line (wow this is a mess...)
match.group(1) if match else re.match(r'(?:https?://)?(.*)',arg).group(1) for match, arg in (
(re.match(TUMBLR_URL_RE, arg), arg)
for arg in argv[2:]
)
] or [
# From artist entry on danbooru
match.group(1) for match in (
re.match(TUMBLR_URL_RE, url['normalized_url'])
for url in artist['urls']
) if match
]
if not tumblr_blogs:
return 'No tumblr blog(s) found.'
posts_needing_update = {
re.match(IMAGE_URL_RE, post['source']).group(1): post['id']
#for post in get_posts(artist['name'], 'source:https://pbs.twimg.com/')
for post in itertools.chain(
get_posts(artist['name'], 'source:https://*.media.tumblr.com/'),
get_posts(artist['name'], 'source:http://*.media.tumblr.com/')
)
# ~source:https://pbs.twimg.com/ ~source:http://pbs.twimg.com/ doesn't work
# source:http*://pbs.twimg.com/ works but is technically wrong and might be harder on the database (?)
}
if posts_needing_update:
print('Found {} posts needing update.'.format(len(posts_needing_update)))
else:
print('No posts with tumblr media sources found.')
return
print('Using tumblr blog(s):', ', '.join(tumblr_blogs))
for blog in tumblr_blogs:
for image_url, tumblr_post_url in all_photo_posts(blog):
image_url = re.match(IMAGE_URL_RE, image_url).group(1)
try:
post_id = posts_needing_update.pop(image_url)
except KeyError:
continue
print('Post #{} -> {}'.format(post_id, tumblr_post_url))
danbooru_api_put('posts/{}'.format(post_id), {
'post[source]': tumblr_post_url
})
if not posts_needing_update:
print('All sources fixed.')
return
return 'Could not find sources for {} post(s): {}'.format(
len(posts_needing_update),
', '.join(map(str, sorted(posts_needing_update.values(), reverse=True)))
)
if __name__ == '__main__':
import sys
sys.exit(main(sys.argv))