From 3affee9bc0d714c8d6c7bfe4f770cadd98d99876 Mon Sep 17 00:00:00 2001 From: Robert Kaplow Date: Tue, 2 Aug 2011 16:26:51 -0400 Subject: [PATCH 1/2] patch --- examples/prediction/prediction.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/prediction/prediction.rb b/examples/prediction/prediction.rb index 5520f6e66..7eebc295d 100644 --- a/examples/prediction/prediction.rb +++ b/examples/prediction/prediction.rb @@ -6,7 +6,7 @@ # Original Author:: Bob Aman, Winton Davies, Robert Kaplow # Maintainer:: Robert Kaplow (mailto:rkaplow@google.com) -$:.unshift('lib') +$LOAD_PATH:.unshift File.dirname('lib') require 'rubygems' require 'sinatra' require 'datamapper' @@ -61,7 +61,7 @@ before do @client.authorization.redirect_uri = to('/oauth2callback') # Workaround for now as expires_in may be nil, but when converted to int it becomes 0. - @client.authorization.expires_in = Time.now + 1800 if @client.authorization.expires_in.to_i == 0 + @client.authorization.expires_in = 1800 if @client.authorization.expires_in.to_i == 0 if session[:token_id] # Load the access token here if it's available From 320b04935494bf0722c2862309222ea309ab5ce2 Mon Sep 17 00:00:00 2001 From: Robert Kaplow Date: Tue, 2 Aug 2011 17:06:06 -0400 Subject: [PATCH 2/2] read --- examples/prediction/README | 53 ++++++++++++++++++++++++++++--- examples/prediction/prediction.rb | 38 +++++++++++++--------- 2 files changed, 71 insertions(+), 20 deletions(-) diff --git a/examples/prediction/README b/examples/prediction/README index b6c7a12c0..aaf39b0ce 100644 --- a/examples/prediction/README +++ b/examples/prediction/README @@ -1,14 +1,38 @@ +APIs Console Project Setup: +------------ +If you have not yet, you must set your APIs Console project to enable Prediction +API and Google Storage. Go to APIs Console https://code.google.com/apis/console/ +and select the project you want to use. Next, go to Services, and enable both +Prediction API and Google Storage. You may also need to enable Billing (Billing) +in the left menu. + + Data Setup: ---------- Before you can run the prediction sample prediction.rb, you must load some csv -formatted data into Google Storage. You can do this by running setup.sh with a -bucket/object name of your choice. You must first create the bucket you want to -use. This can be done with the gsutil function or via the web UI (Storage -Access) in the Google APIs Console. i.e.: +formatted data into Google Storage. + +1 - You must first create the bucket you want to use. This can be done +with the gsutil function or via the web UI (Storage Access) in the Google +APIs Console. i.e. +# gsutil mb gs://BUCKET + +OR + +Go to APIs Console -> Storage Access (on left) and the Google Storage Manager, +and create your bucket there. + +2 - We now load the data you want to use to Google Storage. We have supplied a +basic language identification dataset in the sample for testing. + # chmod 744 setup.sh # ./setup.sh BUCKET/OBJECT Note you need gsutil in your path for this to work. +If you have your own dataset, you can do this manually as well. +gsutil cp your_dataset.csv gs://BUCKET/your_dataset.csv + + In the script, you must then modify the datafile string. This must correspond with the bucket/object of your dataset (if you are using your own dataset). We have provided a setup.sh which will upload some basic sample data. The section is @@ -28,7 +52,7 @@ API. You can also set it up so the user can grant access. First, run the google-api script to generate access and refresh tokens. Ex. # cd google-api-ruby-client -# ruby-1.9.2-p290 bin/google-api oauth-2-login --scope=https://www.googleapis.com/auth/prediction --client-id=NUMBER.apps.googleusercontent.com --client-secret=CLIENT_SECRET +# ruby bin/google-api oauth-2-login --scope=https://www.googleapis.com/auth/prediction --client-id=NUMBER.apps.googleusercontent.com --client-secret=CLIENT_SECRET Fill in your client-id and client-secret from the API Access page. You will probably have to set a redirect URI in your client ID @@ -46,6 +70,25 @@ you are loading it as a yaml, ensure you rename/move the file, as the move the .google-api.yaml file to the sample directory. +Usage : +------- +At this, point, you should have + - Enabled your APIs Console account + - Created a storage bucket, if required + - Uploaded some data to Google Storage + - Modified the script to point the 'datafile' variable to the BUCKET/OBJECT name + - Modified the script to put your credentials in, either in the code or by + loading the generated .yaml file + +We can now run the service! +# ruby prediction.rb + +This should start a service on http://localhost:4567. When you hit the service, +your ruby logs should show the Prediction API calls, and print the prediction +output in the debug. + + + This sample currently does not cover some newer features of Prediction API such as streaming training, hosted models or class weights. If there are any questions or suggestions to improve the script please email us at diff --git a/examples/prediction/prediction.rb b/examples/prediction/prediction.rb index 7eebc295d..ff054a960 100644 --- a/examples/prediction/prediction.rb +++ b/examples/prediction/prediction.rb @@ -6,7 +6,6 @@ # Original Author:: Bob Aman, Winton Davies, Robert Kaplow # Maintainer:: Robert Kaplow (mailto:rkaplow@google.com) -$LOAD_PATH:.unshift File.dirname('lib') require 'rubygems' require 'sinatra' require 'datamapper' @@ -109,6 +108,7 @@ get '/' do # Do a prediction. # FILL IN DESIRED INPUT: # ------------------------------------------------------------------------------- + # Note, the input features should match the features of the dataset. prediction,score = get_prediction(datafile, ["Alice noticed with some surprise."]) # ------------------------------------------------------------------------------- @@ -127,10 +127,11 @@ end def train(datafile) input = "{\"id\" : \"#{datafile}\"}" puts "training input: #{input}" - status, headers, body = @client.execute(@prediction.training.insert, - {}, - input, - {'Content-Type' => 'application/json'}) + result = @client.execute(:api_method => @prediction.training.insert, + :merged_body => input, + :headers => {'Content-Type' => 'application/json'} + ) + status, headers, body = result.response end ## @@ -141,8 +142,9 @@ end # then the correct string is "bucket/object" # @return [Integer] status The HTTP status code of the training job. def get_training_status(datafile) - status, headers, body = @client.execute(@prediction.training.get, - {'data' => datafile}) + result = @client.execute(:api_method => @prediction.training.get, + :parameters => {'data' => datafile}) + status, headers, body = result.response return status end @@ -157,11 +159,14 @@ end def is_done?(datafile) status = get_training_status(datafile) - while true do + # We use an exponential backoff approach here. + test_counter = 0 + while test_counter < 10 do puts "Attempting to check model #{datafile} - Status: #{status} " return true if status == 200 - sleep 10 + sleep 5 * (test_counter + 1) status = get_training_status(datafile) + test_counter += 1 end return false end @@ -184,12 +189,15 @@ def get_prediction(datafile,input_features) # We take the input features and put it in the right input (json) format. input="{\"input\" : { \"csvInstance\" : #{input_features}}}" puts "Prediction Input: #{input}" - status, headers, body = @client.execute(@prediction.training.predict, - {'data' => datafile}, - input, - {'Content-Type' => 'application/json'}) - prediction_data = JSON.parse(body[0]) - + result = @client.execute(:api_method => @prediction.training.predict, + :parameters => {'data' => datafile}, + :merged_body => input, + :headers => {'Content-Type' => 'application/json'}) + status, headers, body = result.response + prediction_data = result.data + puts status + puts body + puts prediction_data # Categorical if prediction_data["outputLabel"] != nil # Pull the most likely label.