HOWTO: Graph/plot Android Market sales from Google Checkout

General topics about the Android-Platform itself.
Coding issues please to the subforum right below.

HOWTO: Graph/plot Android Market sales from Google Checkout

Postby kostmo » Mon Dec 07, 2009 12:28 pm

***UPDATE***: I just posted an app on the Android Market to automate this process and make much nicer plots: http://sites.google.com/site/droidful/my-apps/devrev-developer-revenue

1) Make sure you have curl, mechanize and matplotlib installed
Code: Select all
sudo apt-get install python-matplotlib python-mechanize curl

2) Save the following script as orders.py, make executable, and run as:
Code: Select all
./orders.py <email> [days [bins...]]

Multiple bin counts can be specified to generate histograms of various granularity in one pass.
Syntax: [ Download ] [ Hide ]
Using python Syntax Highlighting
  1. #!/usr/bin/env python
  2.  
  3. ###############################################################################
  4. #
  5. # Copyright (C) 2010 Karl Ostmo
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License");
  8. # you may not use this file except in compliance with the License.
  9. # You may obtain a copy of the License at
  10. #
  11. #      http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS,
  15. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. # See the License for the specific language governing permissions and
  17. # limitations under the License.
  18. #
  19. ###############################################################################
  20. #
  21. # Author: Karl Ostmo
  22. # Date: Mar. 31, 2010
  23. # Version: 1.5
  24. # Description: Produces a histogram plot of orders from Google Checkout data
  25. #
  26. ###############################################################################
  27.  
  28. GDTOKEN_COOKIE_LENGTH = 203
  29. MAX_DAYS_PER_REQUEST = 31
  30. PROFIT_FRACTION = 0.7
  31. RAW_CURL_COMMAND = 'curl --silent --cookie "gdToken=%s" --data "start-date=%s" --data "end-date=%s" --data "financial-state=CHARGED" --data "_type=order-list-request" https://checkout.google.com/cws/v2/Merchant/%d/reportsForm'
  32. MOBILE_CHECKOUT_LOGIN_PAGE = "https://www.google.com/accounts/ServiceLogin?service=sierra&continue=https://checkout.google.com/sell/orders&hl=en_US&nui=5&ltmpl=mobilec&rm=hide&skipvpage=true&btmpl=mobile"
  33.  
  34. # =============================================================================
  35. def getToken(email):
  36.         from mechanize import Browser
  37.         br = Browser()
  38.         br.open(MOBILE_CHECKOUT_LOGIN_PAGE)
  39.         if br.title() == "Google Checkout":
  40.                 br.select_form(nr=0)
  41.                 br["Email"] = email
  42.                 import getpass
  43.                 br["Passwd"] = getpass.getpass(prompt="Enter password for " + email + ": ")
  44.                 response = br.submit()
  45.  
  46.                 import re
  47.                 matches = re.search("Merchant ID: (d+)", response.read())
  48.                 merchant_id = int(matches.group(1))
  49.  
  50.                 for cookie in br._ua_handlers['_cookies'].cookiejar:
  51.                         if cookie.name == "gdToken":
  52.                                 return merchant_id, cookie.value
  53.  
  54. # =============================================================================
  55. class Order:
  56.         def __init__(self, date, cost):
  57.                 self.date = date
  58.                 self.cost = cost
  59.  
  60.         def __repr__(self):
  61.                 return "$" + str(self.cost) + " on " + str(self.date)
  62.  
  63. # =============================================================================
  64. def parse_orders(merchant_id, gdtoken, maxdays):
  65.         from dateutil import parser
  66.         from datetime import date, timedelta
  67.         import csv, commands
  68.  
  69.         overall_enddate = date.today()
  70.         overall_startdate = overall_enddate - timedelta(days=maxdays)
  71.         enddate = overall_enddate
  72.         startdate = None
  73.  
  74.         orders = []
  75.         while enddate > overall_startdate:
  76.  
  77.                 startdate = max(overall_startdate, enddate - timedelta(days=MAX_DAYS_PER_REQUEST))
  78.                 bound_curl_command = RAW_CURL_COMMAND % (gdtoken, startdate.isoformat(), enddate.isoformat(), merchant_id)
  79.                 enddate = startdate
  80.                 print bound_curl_command
  81.                 output = commands.getoutput(bound_curl_command)
  82.                 filehandle = output.split("n")
  83.                 # v1.3 Update:
  84.                 # Ignore time spans without data
  85.                 if filehandle and "No orders were found for this request." in filehandle[0]:
  86.                         print "No orders in this range"
  87.                         continue
  88.  
  89.                 input_reader = csv.reader(filehandle, delimiter=',', quotechar='"')
  90.                 input_reader.next()     # Skip first line
  91.                 for line in input_reader:
  92. #                       from datetime import datetime
  93. #                       date = datetime.strptime(line[2], '%b %d, %Y %I:%M:%S %p')
  94.                         date = parser.parse(line[2])
  95.                         orders.append( Order(date, float(line[4])) )
  96.  
  97.         orders.sort(key=lambda x: x.date)
  98.         return orders
  99.  
  100. # =============================================================================
  101. class OrderBin:
  102.         def __init__(self, start_date, orders):
  103.                 self.start_date = start_date
  104.                 self.orders = orders
  105.  
  106. # =============================================================================
  107. def histogram_orders(orders, bins):
  108.         # v1.2 Update:
  109.         # Aggregates backwards in time so as not to suggest a sudden slump in
  110.         # sales from a partially filled bin
  111.  
  112.         order_dates = [x.date for x in orders]
  113.         min_date = min(order_dates)
  114.         max_date = max(order_dates)
  115.         bin_timespan = (max_date - min_date)/bins
  116.  
  117.         bin_array = []
  118.         order_index = len(orders) - 1
  119.         bin_start_date = max_date
  120.         while bin_start_date >= min_date:
  121.  
  122.                 bin_start_date -= bin_timespan
  123.  
  124.                 timespan_array = []
  125.                 bin_array.append( OrderBin( bin_start_date, timespan_array ) )
  126.                 while order_index >= 0 and orders[order_index].date >= bin_start_date:
  127.                         timespan_array.append( orders[order_index] )
  128.                         order_index -= 1
  129.  
  130.         return bin_array, bin_timespan
  131.  
  132. # =============================================================================
  133. def plot_orders(orders_histogram, bin_timespan):
  134.  
  135.         from matplotlib.pyplot import setp
  136.         from matplotlib.figure import Figure, SubplotParams
  137.         from matplotlib.backends.backend_gtkcairo import FigureCanvasGTKCairo as FigureCanvas
  138.         from matplotlib.dates import DateFormatter
  139.  
  140.         f = Figure(figsize=(4, 4), subplotpars=SubplotParams(bottom=0.2, left=0.15))
  141.         main_axis = f.add_subplot(111)
  142.  
  143.         date_values = [bin.start_date for bin in orders_histogram][:-1] # Exclude the partial bin
  144.         sales_values = [sum([order.cost for order in bin.orders]) for bin in orders_histogram][:-1]     # Exclude the partial bin
  145.  
  146.         p1 = main_axis.plot_date(date_values, sales_values, ls='-', marker='None')
  147.  
  148.         main_axis.xaxis.set_major_formatter(DateFormatter('%m/%d'))
  149.         setp(main_axis.get_xmajorticklabels(), size=6, rotation=70)
  150.  
  151.         main_axis.set_xlabel("Date")
  152.         main_axis.set_ylabel("Gross $ Sales per %.1f days" % (bin_timespan.days + bin_timespan.seconds/(3600*24.0)))
  153.        
  154.         return FigureCanvas( f )
  155.  
  156. # =============================================================================
  157. if __name__ == "__main__":
  158.  
  159.         from sys import argv
  160.         try:
  161. #               merchant_id = long(argv[1])
  162.                 merchant_id, gdtoken = getToken(argv[1])
  163.                 print "Your Merchant ID is:", merchant_id
  164.                 if len(gdtoken) != GDTOKEN_COOKIE_LENGTH:
  165.                         print "gdToken must be %d characters." % GDTOKEN_COOKIE_LENGTH
  166.                         exit(1)
  167.  
  168.                 maxdays = 40
  169.                 histogram_bins = [20]
  170.                 if len(argv) > 2:
  171.                         maxdays = int(argv[2])
  172.                         if len(argv) > 4:
  173.                                 histogram_bins = map(int,argv[3:])
  174.         except Exception, e:
  175.                 print e
  176.                 print "Usage: ./orders.py <email> [days [bins]]"
  177.                 exit(1)
  178.  
  179.         orders = parse_orders(merchant_id, gdtoken, maxdays)
  180.  
  181. #       print "Total number of orders, pre-histogram:", len(orders)
  182. #       print "Gross sales, pre-histogram: $%.2f" % sum([x.cost for x in orders])
  183.  
  184.         for bin_count in histogram_bins:
  185.                 orders_histogram, bin_timespan = histogram_orders(orders, bin_count)
  186.  
  187.                 print "Total number of orders:", sum([sum([len(bin.orders)]) for bin in orders_histogram])
  188.                 gross_income = sum([sum([order.cost for order in bin.orders]) for bin in orders_histogram])
  189.                 print "Gross sales: $%.2f" % gross_income
  190.  
  191.                 print "Profit percentage: %.0f%%" % (PROFIT_FRACTION*100)
  192.                 net_income = PROFIT_FRACTION*gross_income
  193.  
  194.                 print "Net sales to date: $%.2f" % (net_income)
  195.                 print "Avg. profit per day: $%.2f" % (net_income / maxdays)
  196.  
  197.                 canvas = plot_orders(orders_histogram, bin_timespan)
  198.  
  199.                 chart_title = "sales_" + str(bin_count) + "_bins"
  200.                 canvas.print_figure(chart_title + ".pdf", format="pdf", transparent=True)
  201.  
Parsed in 0.028 seconds, using GeSHi 1.0.8.4
Attachments
sales.png
Sales of my apps over the past 150 days
sales.png (10.54 KiB) Viewed 1709 times
kostmo
Freshman
Freshman
 
Posts: 5
Joined: Thu Sep 24, 2009 4:23 am

Top

Return to General

Who is online

Users browsing this forum: No registered users and 2 guests