6

[Golang] Get Photos and Videos in Instagram Post

 2 years ago
source link: http://siongui.github.io/2018/02/14/go-get-photo-video-in-instagram-post/
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
neoserver,ios ssh client

[Golang] Get Photos and Videos in Instagram Post

February 14, 2018

Interesting small program to get URL of all photos and videos in Instagram post.

In this program only Go standard library is used, no third-party packages.

To access the Instagram API via local Go program, you need to login Instagram and get the following information from your browser:

  • ds_user_id
  • sessionid
  • csrftoken
ds_user_id sessionid csrftoken

Please see this SO answer to get above values on Chrome browser.

Given the URL of the post as follows:

https://www.instagram.com/p/BfJzG64BZVY/

The code of the post is BfJzG64BZVY. We will use the code as one of the arguments in our func call.

post.go | repository | view raw

package igmedia

import (
	"encoding/json"
	"errors"
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
	"strconv"
	"strings"
	"time"
)

const urlPost = `https://www.instagram.com/p/{{CODE}}/?__a=1`
const userAgent = "Instagram 10.3.2 (iPhone7,2; iPhone OS 9_3_3; en_US; en-US; scale=2.00; 750x1334) AppleWebKit/420+"

type postInfo struct {
	GraphQL struct {
		ShortcodeMedia EdgeMedia `json:"shortcode_media"`
	} `json:"graphql"`
}

type EdgeMedia struct {
	Typename   string `json:"__typename"`
	Shortcode  string `json:"shortcode"`
	Dimensions struct {
		Height int64 `json:"height"`
		Width  int64 `json:"width"`
	} `json:"dimensions"`
	DisplayUrl       string `json:"display_url"`
	DisplayResources []struct {
		Src          string `json:"src"`
		ConfigWidth  int64  `json:"config_width"`
		ConfigHeight int64  `json:"config_height"`
	} `json:"display_resources"`
	VideoUrl         string `json:"video_url"`
	IsVideo          bool   `json:"is_video"`
	TakenAtTimestamp int64  `json:"taken_at_timestamp"`
	Location         struct {
		Id            string `json:"id"`
		HasPublicPage bool   `json:"has_public_page"`
		Name          string `json:"name"`
		Slug          string `json:"slug"`
	} `json:"location"`
	EdgeSidecarToChildren struct {
		Edges []struct {
			Node EdgeMedia `json:"node"`
		} `json:"edges"`
	} `json:"edge_sidecar_to_children"`
}

// return URL of image with best resolution
func (em *EdgeMedia) getImageUrl() string {
	res := em.DisplayResources
	return res[len(res)-1].Src
}

func (em *EdgeMedia) getVideoUrl() string {
	return em.VideoUrl
}

func (em *EdgeMedia) printEdgeMediaChildInfo() {
	indentation := "   "
	fmt.Println(indentation + em.Typename)

	switch em.Typename {
	case "GraphImage":
		fmt.Println(indentation + em.getImageUrl())
	case "GraphVideo":
		fmt.Println(indentation + em.getVideoUrl())
	default:
		panic(em.Typename)
	}
	fmt.Println("")
}

func (em *EdgeMedia) printEdgeMediaInfo() {
	fmt.Println(em.Typename)
	fmt.Println(stripQueryString(codeToUrl(em.Shortcode)))

	// print media (photos and videos) links
	switch em.Typename {
	case "GraphImage":
		fmt.Println(em.getImageUrl())
	case "GraphVideo":
		fmt.Println(em.getVideoUrl())
	case "GraphSidecar":
		fmt.Println("")
		for _, edge := range em.EdgeSidecarToChildren.Edges {
			edge.Node.printEdgeMediaChildInfo()
		}
	default:
		panic(em.Typename)
	}

	printTimestamp(em.TakenAtTimestamp)
	fmt.Println("")
}

// Given the code of the post, return url of the post.
func codeToUrl(code string) string {
	return strings.Replace(urlPost, "{{CODE}}", code, 1)
}

func printTimestamp(timestamp int64) {
	fmt.Println(formatTimestamp(timestamp))
}

func formatTimestamp(timestamp int64) string {
	t := time.Unix(timestamp, 0)
	return t.Format(time.RFC3339)
}

// Remove query string in the URL
func stripQueryString(inputUrl string) string {
	u, err := url.Parse(inputUrl)
	if err != nil {
		panic(err)
	}
	u.RawQuery = ""
	return u.String()
}

// Send HTTP request and get http response on behalf of a specific Instagram
// user. After login to Instagram, you can get the cookies of *ds_user_id*,
// *sessionid*, *csrftoken* in Chrome Developer Tools.
// See https://stackoverflow.com/a/44773079
// or
// https://github.com/hoschiCZ/instastories-backup#obtain-cookies
func getHTTPResponse(url, ds_user_id, sessionid, csrftoken string) (b []byte, err error) {
	req, err := http.NewRequest("GET", url, nil)
	if err != nil {
		return
	}

	req.AddCookie(&http.Cookie{Name: "ds_user_id", Value: ds_user_id})
	req.AddCookie(&http.Cookie{Name: "sessionid", Value: sessionid})
	req.AddCookie(&http.Cookie{Name: "csrftoken", Value: csrftoken})

	req.Header.Set("User-Agent", userAgent)

	client := &http.Client{}
	resp, err := client.Do(req)
	if err != nil {
		return
	}
	defer resp.Body.Close()

	if resp.StatusCode != 200 {
		err = errors.New(
			"resp.StatusCode: " +
				strconv.Itoa(resp.StatusCode))
		return
	}

	return ioutil.ReadAll(resp.Body)
}

// Given code of post, return information of the post.
func GetPostInfo(code, ds_user_id, sessionid, csrftoken string) (em EdgeMedia, err error) {
	url := codeToUrl(code)
	b, err := getHTTPResponse(url, ds_user_id, sessionid, csrftoken)
	if err != nil {
		return
	}

	pi := postInfo{}
	err = json.Unmarshal(b, &pi)
	if err != nil {
		return
	}
	em = pi.GraphQL.ShortcodeMedia
	return
}

Example:

post_test.go | repository | view raw

package igmedia

import (
	"os"
	"testing"
)

func ExampleGetPostInfo(t *testing.T) {
	em, err := GetPostInfo(os.Getenv("IG_TEST_CODE"),
		os.Getenv("IG_DS_USER_ID"),
		os.Getenv("IG_SESSIONID"),
		os.Getenv("IG_CSRFTOKEN"))
	if err != nil {
		t.Error(err)
		return
	}
	em.printEdgeMediaInfo()
}

The full code is also available on my GitHub repo [1].


Tested on: Ubuntu Linux 17.10, Go 1.9.4.


References:


About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK