1) Make sure you have curl, mechanize and matplotlib installed
- Code: Select all
sudo apt-get install python-matplotlib python-mechanize curl
2) Save the following script as orders.py, make executable, and run as:
- Code: Select all
./orders.py <email> [days [bins...]]
Multiple bin counts can be specified to generate histograms of various granularity in one pass.
Using python Syntax Highlighting
- #!/usr/bin/env python
- ###############################################################################
- #
- # Copyright (C) 2010 Karl Ostmo
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- ###############################################################################
- #
- # Author: Karl Ostmo
- # Date: Mar. 31, 2010
- # Version: 1.5
- # Description: Produces a histogram plot of orders from Google Checkout data
- #
- ###############################################################################
- GDTOKEN_COOKIE_LENGTH = 203
- MAX_DAYS_PER_REQUEST = 31
- PROFIT_FRACTION = 0.7
- RAW_CURL_COMMAND = 'curl --silent --cookie "gdToken=%s" --data "start-date=%s" --data "end-date=%s" --data "financial-state=CHARGED" --data "_type=order-list-request" https://checkout.google.com/cws/v2/Merchant/%d/reportsForm'
- MOBILE_CHECKOUT_LOGIN_PAGE = "https://www.google.com/accounts/ServiceLogin?service=sierra&continue=https://checkout.google.com/sell/orders&hl=en_US&nui=5<mpl=mobilec&rm=hide&skipvpage=true&btmpl=mobile"
- # =============================================================================
- def getToken(email):
- from mechanize import Browser
- br = Browser()
- br.open(MOBILE_CHECKOUT_LOGIN_PAGE)
- if br.title() == "Google Checkout":
- br.select_form(nr=0)
- br["Email"] = email
- import getpass
- br["Passwd"] = getpass.getpass(prompt="Enter password for " + email + ": ")
- response = br.submit()
- import re
- matches = re.search("Merchant ID: (d+)", response.read())
- merchant_id = int(matches.group(1))
- for cookie in br._ua_handlers['_cookies'].cookiejar:
- if cookie.name == "gdToken":
- return merchant_id, cookie.value
- # =============================================================================
- class Order:
- def __init__(self, date, cost):
- self.date = date
- self.cost = cost
- def __repr__(self):
- return "$" + str(self.cost) + " on " + str(self.date)
- # =============================================================================
- def parse_orders(merchant_id, gdtoken, maxdays):
- from dateutil import parser
- from datetime import date, timedelta
- import csv, commands
- overall_enddate = date.today()
- overall_startdate = overall_enddate - timedelta(days=maxdays)
- enddate = overall_enddate
- startdate = None
- orders = []
- while enddate > overall_startdate:
- startdate = max(overall_startdate, enddate - timedelta(days=MAX_DAYS_PER_REQUEST))
- bound_curl_command = RAW_CURL_COMMAND % (gdtoken, startdate.isoformat(), enddate.isoformat(), merchant_id)
- enddate = startdate
- print bound_curl_command
- output = commands.getoutput(bound_curl_command)
- filehandle = output.split("n")
- # v1.3 Update:
- # Ignore time spans without data
- if filehandle and "No orders were found for this request." in filehandle[0]:
- print "No orders in this range"
- continue
- input_reader = csv.reader(filehandle, delimiter=',', quotechar='"')
- input_reader.next() # Skip first line
- for line in input_reader:
- # from datetime import datetime
- # date = datetime.strptime(line[2], '%b %d, %Y %I:%M:%S %p')
- date = parser.parse(line[2])
- orders.append( Order(date, float(line[4])) )
- orders.sort(key=lambda x: x.date)
- return orders
- # =============================================================================
- class OrderBin:
- def __init__(self, start_date, orders):
- self.start_date = start_date
- self.orders = orders
- # =============================================================================
- def histogram_orders(orders, bins):
- # v1.2 Update:
- # Aggregates backwards in time so as not to suggest a sudden slump in
- # sales from a partially filled bin
- order_dates = [x.date for x in orders]
- min_date = min(order_dates)
- max_date = max(order_dates)
- bin_timespan = (max_date - min_date)/bins
- bin_array = []
- order_index = len(orders) - 1
- bin_start_date = max_date
- while bin_start_date >= min_date:
- bin_start_date -= bin_timespan
- timespan_array = []
- bin_array.append( OrderBin( bin_start_date, timespan_array ) )
- while order_index >= 0 and orders[order_index].date >= bin_start_date:
- timespan_array.append( orders[order_index] )
- order_index -= 1
- return bin_array, bin_timespan
- # =============================================================================
- def plot_orders(orders_histogram, bin_timespan):
- from matplotlib.pyplot import setp
- from matplotlib.figure import Figure, SubplotParams
- from matplotlib.backends.backend_gtkcairo import FigureCanvasGTKCairo as FigureCanvas
- from matplotlib.dates import DateFormatter
- f = Figure(figsize=(4, 4), subplotpars=SubplotParams(bottom=0.2, left=0.15))
- main_axis = f.add_subplot(111)
- date_values = [bin.start_date for bin in orders_histogram][:-1] # Exclude the partial bin
- sales_values = [sum([order.cost for order in bin.orders]) for bin in orders_histogram][:-1] # Exclude the partial bin
- p1 = main_axis.plot_date(date_values, sales_values, ls='-', marker='None')
- main_axis.xaxis.set_major_formatter(DateFormatter('%m/%d'))
- setp(main_axis.get_xmajorticklabels(), size=6, rotation=70)
- main_axis.set_xlabel("Date")
- main_axis.set_ylabel("Gross $ Sales per %.1f days" % (bin_timespan.days + bin_timespan.seconds/(3600*24.0)))
- return FigureCanvas( f )
- # =============================================================================
- if __name__ == "__main__":
- from sys import argv
- try:
- # merchant_id = long(argv[1])
- merchant_id, gdtoken = getToken(argv[1])
- print "Your Merchant ID is:", merchant_id
- if len(gdtoken) != GDTOKEN_COOKIE_LENGTH:
- print "gdToken must be %d characters." % GDTOKEN_COOKIE_LENGTH
- exit(1)
- maxdays = 40
- histogram_bins = [20]
- if len(argv) > 2:
- maxdays = int(argv[2])
- if len(argv) > 4:
- histogram_bins = map(int,argv[3:])
- except Exception, e:
- print e
- print "Usage: ./orders.py <email> [days [bins]]"
- exit(1)
- orders = parse_orders(merchant_id, gdtoken, maxdays)
- # print "Total number of orders, pre-histogram:", len(orders)
- # print "Gross sales, pre-histogram: $%.2f" % sum([x.cost for x in orders])
- for bin_count in histogram_bins:
- orders_histogram, bin_timespan = histogram_orders(orders, bin_count)
- print "Total number of orders:", sum([sum([len(bin.orders)]) for bin in orders_histogram])
- gross_income = sum([sum([order.cost for order in bin.orders]) for bin in orders_histogram])
- print "Gross sales: $%.2f" % gross_income
- print "Profit percentage: %.0f%%" % (PROFIT_FRACTION*100)
- net_income = PROFIT_FRACTION*gross_income
- print "Net sales to date: $%.2f" % (net_income)
- print "Avg. profit per day: $%.2f" % (net_income / maxdays)
- canvas = plot_orders(orders_histogram, bin_timespan)
- chart_title = "sales_" + str(bin_count) + "_bins"
- canvas.print_figure(chart_title + ".pdf", format="pdf", transparent=True)
Parsed in 0.031 seconds, using GeSHi 1.0.8.4

