Jake.codes

SearchLink, for Pythonista

Tuesday, January 14, 2014

A Pythonista-based fork of Omz’s Editorial-based fork of Brett Terpstra’s Ruby-based SearchLink

# Original SearchLink by Brett Terpstra (@ttscoff)
# 
# Project page:
# http://brettterpstra.com/projects/searchlink/
# 
# Port to Python/Editorial by Ole Zorn (@olemoritz)
# Based on SearchLink 2.0
#
# Bugfixes and conversion from Editorial to Drafts.app support
# by Jake Bilbrey
# pythonista://SearchLink?action=run&argv=[[draft]]

# Configuration:

# set to True to force inline links
inline = True

# set to True to add titles to links based on site title
include_titles = False

# change this to set a specific country for search (default US)
country_code = 'US'

# set to True to include a random string in ref titles
# allows running SearchLink multiple times w/out conflicts
prefix_random = False

# append affiliate link info to iTunes urls, empty quotes for none
# Example:
itunes_affiliate = '&at=1l3v3KV&ct=searchlink'

# to create Amazon affiliate links, set amazon_partner to:
# [tag, camp, creative]
# Use the amazon link tool to create any affiliate link and examine
# to find the needed parts. Set to False to return regular amazon links
# Example:
amazon_partner = ["jakebilbrcom-20","1789","390957"]

# To create custom abbreviations for Google Site Searches,
# add to (or replace) the hash below.
# "abbreviation" => "site.url",
# This allows you, for example to use [search term](!bt)
# as a shortcut to search brettterpstra.com. Keys in this
# hash can override existing search triggers.
custom_site_searches = {
  'bt': 'brettterpstra.com',
  'md': 'www.macdrifter.com'
}

import requests
import json
import re
import urllib
import random
import sys
import webbrowser

input_ = sys.argv[1]

if not inline and len(re.findall(r'\]\(', input_)) == 1:
    inline = True

def clean_string(s):
    s = re.sub(r'\n+', ' ', s)
    s = re.sub(r'"', '&quot', s)
    s = re.sub(r'\|', '-', s)
    return s.strip()

# TODO: Read custom config...

def wiki(terms):
    uri = 'http://en.wikipedia.org/w/api.php?action=query&format=json&prop=info&inprop=url&titles=' + urllib.quote(terms)
    json_string = requests.get(uri, headers={'Referer': 'http://bretterpstra.com', 'User-Agent': 'SearchLink (http://brettterpstra.com)'}).text
    result = json.loads(json_string)
    if result:
        pages = result['query']['pages']
        first_page = pages[pages.keys()[0]]
        return (first_page['fullurl'], first_page['title'])

def zero_click(terms):
    url = 'http://api.duckduckgo.com/?q=%s&format=json&no_redirect=1&no_html=1&skip_disambig=1' % urllib.quote(terms)
    json_string = requests.get(url).text
    result = json.loads(json_string)
    if result:
        definition = result.get('Definition', None)
        definition_link = result.get('DefinitionURL', None)
        wiki_link = result.get('AbstractURL', None)
        title = result.get('Heading', None)
        return (title, definition, definition_link, wiki_link)
    else:
        return (None, None, None, None)

def itunes(entity, terms, dev, aff='', country_code='US'):
    url = 'http://itunes.apple.com/search?term=%s&country=%s&entity=%s&attribute=allTrackTerm' % (urllib.quote(terms), country_code, entity)
    json_string = requests.get(url).text
    json_dict = json.loads(json_string)
    if json_dict.get('resultCount') and json_dict.get('resultCount') > 0:
        result = json_dict['results'][0]
        if re.match(r'(mac|iPad)Software',entity):
            output_url = result['sellerUrl'] if dev else result['trackViewUrl']
            output_title = result['trackName']
        elif re.match(r'(musicArtist|song|album)',entity):
            wrapper_type = result['wrapperType']
            if wrapper_type == 'track':
                output_url = result['trackViewUrl']
                output_title = result['trackName'] + ' by ' + result['artistName']
            elif wrapper_type == 'collection':
                output_url = result['collectionViewUrl']
                output_title = result['collectionName'] + ' by ' + result['artistName']
            elif wrapper_type == 'artist':
                output_url = result['artistLinkUrl']
                output_title = result['artistName']
        if dev:
            return (output_url, output_title)
        else:
            return (output_url + aff, output_title)
    else:
        return (None, None)

def lastfm(entity, terms):
    url = 'http://ws.audioscrobbler.com/2.0/?method=%s.search&%s=%s&api_key=2f3407ec29601f97ca8a18ff580477de&format=json' % (entity, entity, urllib.quote(terms))
    res = requests.get(url).text
    json_dict = json.loads(res)
    if json_dict.get('results', None):
        try:
            if entity == 'track':
                result = json_dict['results']['trackmatches']['track'][0]
                url = result['url']
                title = result['name'] + ' by ' + result['artist']
            elif entity == 'artist':
                result = json_dict['results']['artistmatches']['artist'][0]
                url = result['url']
                title = result['name']
        except:
            return (None, None)
        return (url, title)
    else:
        return (None, None)

def google(terms, define=False):
    uri = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&filter=1&rsz=small&q=' + urllib.quote(terms)
    json_string = requests.get(uri, headers={'Referer': 'http://bretterpstra.com', 'User-Agent': 'SearchLink (http://brettterpstra.com)'}).text
    json_dict = json.loads(json_string)
    if json_dict.get('responseData', None):
        result = json_dict['responseData'].get('results', None)
        if not result:
            return [None, None]
        result = result[0]
        output_url = result['unescapedUrl']
        if define and re.match(r'dictionary', output_url):
            output_title = re.sub(r'<\/?.*?>', '', result['content'])
        else:
            output_title = result['titleNoFormatting']
        return [output_url, output_title]

def amazon_affiliatize(url, amazon_partner):
    if not amazon_partner:
        return url
    url_match = re.match(r'http:\/\/www.amazon.com\/(?:(.*?)\/)?dp\/([^\?]+)', url)
    if url_match:
        title = url_match.group(1)
        id_ = url_match.group(2)
        ##tag = url_match.group(3)
        az_url = 'http://www.amazon.com/gp/product/%s/ref=as_li_ss_tl?ie=UTF8&camp=%s&creative=%s&creativeASIN=%s&linkCode=as2&tag=%s' % (id_, amazon_partner[1], amazon_partner[2], id_, amazon_partner[0])
        return (az_url, title)
    else:
        return (url, '')

links = {}
footer = ''
prefix = ('%04d' % random.randint(0, 9999)) + '-' if prefix_random else ''
highest_marker = 0

for match in re.finditer(r'\[(?:%s-)?(\d+)\]: ' % (prefix), input_):
    marker = int(match.group(1))
    print 'marker:', marker
    highest_marker = max(marker, highest_marker)

if re.search(r'\[(.*?)\]\((.*?)\)', input_):
    def repl(match):
        global footer
        link_text = match.group(1)
        link_info = match.group(2)
        search_type = ''
        search_terms = ''

        link_info_match = re.match(r'^(?:\!(.+) )?"(.*?)"$', link_info)
        if link_info_match:
            if not link_info_match.group(1):
                search_type = 'g'
            else:
                search_type = link_info_match.group(1)
            search_terms = link_info_match.group(2)
        if not link_info_match:
            link_info_match = re.match(r'^\!', link_info)
            if link_info_match:
                search_word_match = re.match(r'^\!(.+)', link_info)
                if len(search_word_match.groups()) > 0:
                    search_type = search_word_match.group(1)
                search_word = search_word_match.group(0)
                search_terms = link_text
        if not link_info_match and link_text and not link_info:
            search_type = 'g'
            search_terms = link_text
        if not link_info_match and link_text:
            search_type = 'g'
            search_terms = link_text

        if search_type and search_terms:
            for k, v in custom_site_searches.iteritems():
                if search_type == k:
                    search_type = 'g'
                    search_terms = 'site:%s %s' % (v, search_terms)

        url = None
        title = None

        if search_type == 'a':
            az_url, title = google('site:amazon.com %s' % search_terms, False)
            url, title = amazon_affiliatize(az_url, amazon_partner)
        elif search_type == 'g': # google lucky search
            url, title = google(search_terms)
        elif search_type == 'wiki':
            url, title = wiki(search_terms)
        elif search_type == 'def':
            url, title = google("define " + search_terms, True)
        elif search_type == 'mas' or search_type == 'masd': # Mac App Store search (mas = itunes link, masd = developer link)
            dev = True if search_type == 'masd' else False
            url, title = itunes('macSoftware', search_terms, dev, itunes_affiliate, country_code)
        elif search_type == 'itu' or search_type == 'itud': # iTunes app search
            dev = True if search_type == 'itud' else False
            url, title = itunes('iPadSoftware',search_terms, dev, itunes_affiliate, country_code)
        elif search_type == 's': # software search (google)
            url, title = google('(software OR app OR mac) %s' % search_terms)
            if link_text == '':
                link_text = title
        elif search_type == 'isong': # iTunes Song Search
            url, title = itunes('song', search_terms, False)
        elif search_type == 'iart': # iTunes Artist Search
            url, title = itunes('musicArtist', search_terms, False)
        elif search_type == 'ialb': # iTunes Album Search
            url, title = itunes('album', search_terms, False)
        elif search_type == 'lsong': # Last.fm Song Search
            url, title = lastfm('track', search_terms)
        elif search_type == 'lart':
            url, title = lastfm('artist', search_terms)
        else:
            if search_terms:
                if re.match(r'.+?\.\w{2,4}$', search_type):
                    url, title = google('site:%s %s' % (search_type, search_terms))
                else:
                    url, title = google(search_terms)

        if url:
            if link_text == '' and title:
                link_text = title
            if inline:
                if title and include_titles:
                    return '[%s](%s "%s")' % (link_text, url, clean_string(title))
                else:
                    return '[%s](%s)' % (link_text, url)
            else:
                if url in links:
                    marker = prefix + ('%d' % (int(links[url]) + highest_marker))
                else:
                    links[url] = prefix + ('%d' % (len(links) + 1 + highest_marker))
                    footer += '\n[%s]: %s' % (links[url], url)
                    if title and include_titles:
                        footer += '"%s"' % clean_string(title)

                if title:
                    return '[%s][%s]' % (link_text, links[url])
                else:
                    return '[%s](%s)' % (link_text, url)
        else:
            return match.group(0)

    result = re.sub(r'\[(.*?)\]\((.*?)\)', repl, input_)
else:
    url, title = google(input_)
    if include_titles:
        result = '[%s](%s "%s")' % (input_.strip(), url, clean_string(title))
    else:
        result = '[%s](%s)' % (input_.strip(), url)

if not inline and footer:
    result += '\n' + footer

draftsURL = 'drafts://x-callback-url/create?text=' + urllib.quote(result.encode('utf-8'))
webbrowser.open(draftsURL)