Files
stash/vendor/github.com/chromedp/chromedp/chromedp.go
2020-08-04 10:42:40 +10:00

513 lines
15 KiB
Go

// Package chromedp is a high level Chrome DevTools Protocol client that
// simplifies driving browsers for scraping, unit testing, or profiling web
// pages using the CDP.
//
// chromedp requires no third-party dependencies, implementing the async Chrome
// DevTools Protocol entirely in Go.
//
// This package includes a number of simple examples. Additionally,
// https://github.com/chromedp/examples contains more complex examples.
package chromedp
import (
"context"
"fmt"
"sync"
"time"
"github.com/chromedp/cdproto/browser"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/css"
"github.com/chromedp/cdproto/dom"
"github.com/chromedp/cdproto/inspector"
"github.com/chromedp/cdproto/log"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/cdproto/runtime"
"github.com/chromedp/cdproto/target"
)
// Context is attached to any context.Context which is valid for use with Run.
type Context struct {
// Allocator is used to create new browsers. It is inherited from the
// parent context when using NewContext.
Allocator Allocator
// Browser is the browser being used in the context. It is inherited
// from the parent context when using NewContext.
Browser *Browser
// Target is the target to run actions (commands) against. It is not
// inherited from the parent context, and typically each context will
// have its own unique Target pointing to a separate browser tab (page).
Target *Target
// targetID is set up by WithTargetID. If nil, Run will pick the only
// unused page target, or create a new one.
targetID target.ID
browserListeners []cancelableListener
targetListeners []cancelableListener
// browserOpts holds the browser options passed to NewContext via
// WithBrowserOption, so that they can later be used when allocating a
// browser in Run.
browserOpts []BrowserOption
// cancel simply cancels the context that was used to start Browser.
// This is useful to stop all activity and avoid deadlocks if we detect
// that the browser was closed or happened to crash. Note that this
// cancel function doesn't do any waiting.
cancel func()
// first records whether this context created a brand new Chrome
// process. This is important, because its cancellation should stop the
// entire browser and its handler, and not just a portion of its pages.
first bool
// closedTarget allows waiting for a target's page to be closed on
// cancellation.
closedTarget sync.WaitGroup
// allocated is closed when an allocated browser completely stops. If no
// browser needs to be allocated, the channel is simply not initialised
// and remains nil.
allocated chan struct{}
// cancelErr is the first error encountered when cancelling this
// context, for example if a browser's temporary user data directory
// couldn't be deleted.
cancelErr error
}
// NewContext creates a chromedp context from the parent context. The parent
// context's Allocator is inherited, defaulting to an ExecAllocator with
// DefaultExecAllocatorOptions.
//
// If the parent context contains an allocated Browser, the child context
// inherits it, and its first Run creates a new tab on that browser. Otherwise,
// its first Run will allocate a new browser.
//
// Cancelling the returned context will close a tab or an entire browser,
// depending on the logic described above. To cancel a context while checking
// for errors, see Cancel.
//
// Note that NewContext doesn't allocate nor start a browser; that happens the
// first time Run is used on the context.
func NewContext(parent context.Context, opts ...ContextOption) (context.Context, context.CancelFunc) {
ctx, cancel := context.WithCancel(parent)
c := &Context{cancel: cancel, first: true}
if pc := FromContext(parent); pc != nil {
c.Allocator = pc.Allocator
c.Browser = pc.Browser
// don't inherit Target, so that NewContext can be used to
// create a new tab on the same browser.
c.first = c.Browser == nil
// TODO: make this more generic somehow.
if _, ok := c.Allocator.(*RemoteAllocator); ok {
c.first = false
}
}
if c.Browser == nil {
// set up the semaphore for Allocator.Allocate
c.allocated = make(chan struct{}, 1)
c.allocated <- struct{}{}
}
for _, o := range opts {
o(c)
}
if c.Allocator == nil {
c.Allocator = setupExecAllocator(DefaultExecAllocatorOptions[:]...)
}
ctx = context.WithValue(ctx, contextKey{}, c)
c.closedTarget.Add(1)
go func() {
<-ctx.Done()
defer c.closedTarget.Done()
if c.first {
// This is the original browser tab, so the entire
// browser will already be cleaned up elsewhere.
return
}
if c.Target == nil {
// This is a new tab, but we didn't create it and attach
// to it yet. Nothing to do.
return
}
// Not the original browser tab; simply detach and close it.
// We need a new context, as ctx is cancelled; use a 1s timeout.
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
if id := c.Target.SessionID; id != "" {
action := target.DetachFromTarget().WithSessionID(id)
if err := action.Do(cdp.WithExecutor(ctx, c.Browser)); c.cancelErr == nil {
c.cancelErr = err
}
}
if id := c.Target.TargetID; id != "" {
action := target.CloseTarget(id)
if ok, err := action.Do(cdp.WithExecutor(ctx, c.Browser)); c.cancelErr == nil {
if !ok && err == nil {
err = fmt.Errorf("could not close target %q", id)
}
c.cancelErr = err
}
}
}()
cancelWait := func() {
cancel()
c.closedTarget.Wait()
// If we allocated, wait for the browser to stop.
if c.allocated != nil {
<-c.allocated
}
}
return ctx, cancelWait
}
type contextKey struct{}
// FromContext extracts the Context data stored inside a context.Context.
func FromContext(ctx context.Context) *Context {
c, _ := ctx.Value(contextKey{}).(*Context)
return c
}
// Cancel cancels a chromedp context, waits for its resources to be cleaned up,
// and returns any error encountered during that process.
//
// If the context allocated a browser, the browser will be closed gracefully by
// Cancel.
//
// Usually a "defer cancel()" will be enough for most use cases. However, Cancel
// is the better option if one wants to gracefully close a browser, or catch
// underlying errors happening during cancellation.
func Cancel(ctx context.Context) error {
c := FromContext(ctx)
if c == nil {
return ErrInvalidContext
}
if c.first && c.Browser != nil {
if err := c.Browser.execute(ctx, browser.CommandClose, nil, nil); err != nil {
return err
}
} else {
c.cancel()
c.closedTarget.Wait()
}
// If we allocated, wait for the browser to stop.
if c.allocated != nil {
<-c.allocated
}
return c.cancelErr
}
// Run runs an action against context. The provided context must be a valid
// chromedp context, typically created via NewContext.
//
// Note that the first time Run is called on a context, a browser will be
// allocated via Allocator. Thus, it's generally a bad idea to use a context
// timeout on the first Run call, as it will stop the entire browser.
func Run(ctx context.Context, actions ...Action) error {
c := FromContext(ctx)
// If c is nil, it's not a chromedp context.
// If c.Allocator is nil, NewContext wasn't used properly.
// If c.cancel is nil, Run is being called directly with an allocator
// context.
if c == nil || c.Allocator == nil || c.cancel == nil {
return ErrInvalidContext
}
if c.Browser == nil {
browser, err := c.Allocator.Allocate(ctx, c.browserOpts...)
if err != nil {
return err
}
c.Browser = browser
c.Browser.listeners = append(c.Browser.listeners, c.browserListeners...)
}
if c.Target == nil {
if err := c.newTarget(ctx); err != nil {
return err
}
}
return Tasks(actions).Do(cdp.WithExecutor(ctx, c.Target))
}
func (c *Context) newTarget(ctx context.Context) error {
if c.targetID != "" {
if err := c.attachTarget(ctx, c.targetID); err != nil {
return err
}
// This new page might have already loaded its top-level frame
// already, in which case we wouldn't see the frameNavigated and
// documentUpdated events. Load them here.
tree, err := page.GetFrameTree().Do(cdp.WithExecutor(ctx, c.Target))
if err != nil {
return err
}
c.Target.cur = tree.Frame
c.Target.documentUpdated(ctx)
return nil
}
if !c.first {
var err error
c.targetID, err = target.CreateTarget("about:blank").Do(cdp.WithExecutor(ctx, c.Browser))
if err != nil {
return err
}
return c.attachTarget(ctx, c.targetID)
}
// This is like WaitNewTarget, but for the entire browser.
ch := make(chan target.ID, 1)
lctx, cancel := context.WithCancel(ctx)
ListenBrowser(lctx, func(ev interface{}) {
var info *target.Info
switch ev := ev.(type) {
case *target.EventTargetCreated:
info = ev.TargetInfo
case *target.EventTargetInfoChanged:
info = ev.TargetInfo
default:
return
}
if info.Type == "page" && info.URL == "about:blank" {
select {
case <-lctx.Done():
case ch <- info.TargetID:
}
cancel()
}
})
// wait for the first blank tab to appear
action := target.SetDiscoverTargets(true)
if err := action.Do(cdp.WithExecutor(ctx, c.Browser)); err != nil {
return err
}
select {
case <-ctx.Done():
return ctx.Err()
case c.targetID = <-ch:
}
return c.attachTarget(ctx, c.targetID)
}
func (c *Context) attachTarget(ctx context.Context, targetID target.ID) error {
sessionID, err := target.AttachToTarget(targetID).WithFlatten(true).Do(cdp.WithExecutor(ctx, c.Browser))
if err != nil {
return err
}
c.Target, err = c.Browser.newExecutorForTarget(ctx, targetID, sessionID)
if err != nil {
return err
}
c.Target.listeners = append(c.Target.listeners, c.targetListeners...)
go c.Target.run(ctx)
for _, action := range []Action{
// enable domains
log.Enable(),
runtime.Enable(),
inspector.Enable(),
page.Enable(),
dom.Enable(),
css.Enable(),
// enable target discovery
target.SetDiscoverTargets(true),
target.SetAutoAttach(true, false).WithFlatten(true),
} {
if err := action.Do(cdp.WithExecutor(ctx, c.Target)); err != nil {
return fmt.Errorf("unable to execute %T: %v", action, err)
}
}
return nil
}
// ContextOption is a context option.
type ContextOption = func(*Context)
// WithTargetID sets up a context to be attached to an existing target, instead
// of creating a new one.
func WithTargetID(id target.ID) ContextOption {
return func(c *Context) { c.targetID = id }
}
// WithLogf is a shortcut for WithBrowserOption(WithBrowserLogf(f)).
func WithLogf(f func(string, ...interface{})) ContextOption {
return WithBrowserOption(WithBrowserLogf(f))
}
// WithErrorf is a shortcut for WithBrowserOption(WithBrowserErrorf(f)).
func WithErrorf(f func(string, ...interface{})) ContextOption {
return WithBrowserOption(WithBrowserErrorf(f))
}
// WithDebugf is a shortcut for WithBrowserOption(WithBrowserDebugf(f)).
func WithDebugf(f func(string, ...interface{})) ContextOption {
return WithBrowserOption(WithBrowserDebugf(f))
}
// WithBrowserOption allows passing a number of browser options to the allocator
// when allocating a new browser. As such, this context option can only be used
// when NewContext is allocating a new browser.
func WithBrowserOption(opts ...BrowserOption) ContextOption {
return func(c *Context) {
if !c.first {
panic("WithBrowserOption can only be used when allocating a new browser")
}
c.browserOpts = append(c.browserOpts, opts...)
}
}
// Targets lists all the targets in the browser attached to the given context.
func Targets(ctx context.Context) ([]*target.Info, error) {
if err := Run(ctx); err != nil {
return nil, err
}
c := FromContext(ctx)
return target.GetTargets().Do(cdp.WithExecutor(ctx, c.Browser))
}
// Action is the common interface for an action that will be executed against a
// context and frame handler.
type Action interface {
// Do executes the action using the provided context and frame handler.
Do(context.Context) error
}
// ActionFunc is a adapter to allow the use of ordinary func's as an Action.
type ActionFunc func(context.Context) error
// Do executes the func f using the provided context and frame handler.
func (f ActionFunc) Do(ctx context.Context) error {
return f(ctx)
}
// Tasks is a sequential list of Actions that can be used as a single Action.
type Tasks []Action
// Do executes the list of Actions sequentially, using the provided context and
// frame handler.
func (t Tasks) Do(ctx context.Context) error {
for _, a := range t {
if err := a.Do(ctx); err != nil {
return err
}
}
return nil
}
// Sleep is an empty action that calls time.Sleep with the specified duration.
//
// Note: this is a temporary action definition for convenience, and will likely
// be marked for deprecation in the future, after the remaining Actions have
// been able to be written/tested.
func Sleep(d time.Duration) Action {
return ActionFunc(func(ctx context.Context) error {
// Don't use time.After, to avoid a temporary goroutine leak if
// ctx is cancelled before the timer fires.
t := time.NewTimer(d)
select {
case <-ctx.Done():
t.Stop()
return ctx.Err()
case <-t.C:
}
return nil
})
}
type cancelableListener struct {
ctx context.Context
fn func(ev interface{})
}
// ListenBrowser adds a function which will be called whenever a browser event
// is received on the chromedp context. Cancelling ctx stops the listener from
// receiving any more events.
//
// Note that the function is called synchronously when handling events. The
// function should avoid blocking at all costs. For example, any Actions must be
// run via a separate goroutine.
func ListenBrowser(ctx context.Context, fn func(ev interface{})) {
c := FromContext(ctx)
if c == nil {
panic(ErrInvalidContext)
}
cl := cancelableListener{ctx, fn}
if c.Browser != nil {
c.Browser.listenersMu.Lock()
c.Browser.listeners = append(c.Browser.listeners, cl)
c.Browser.listenersMu.Unlock()
} else {
c.browserListeners = append(c.browserListeners, cl)
}
}
// ListenTarget adds a function which will be called whenever a target event is
// received on the chromedp context. Note that this only includes browser
// events; command responses and target events are not included. Cancelling ctx
// stops the listener from receiving any more events.
//
// Note that the function is called synchronously when handling events. The
// function should avoid blocking at all costs. For example, any Actions must be
// run via a separate goroutine.
func ListenTarget(ctx context.Context, fn func(ev interface{})) {
c := FromContext(ctx)
if c == nil {
panic(ErrInvalidContext)
}
cl := cancelableListener{ctx, fn}
if c.Target != nil {
c.Target.listenersMu.Lock()
c.Target.listeners = append(c.Target.listeners, cl)
c.Target.listenersMu.Unlock()
} else {
c.targetListeners = append(c.targetListeners, cl)
}
}
// WaitNewTarget can be used to wait for the current target to open a new
// target. Once fn matches a new unattached target, its target ID is sent via
// the returned channel.
func WaitNewTarget(ctx context.Context, fn func(*target.Info) bool) <-chan target.ID {
ch := make(chan target.ID, 1)
lctx, cancel := context.WithCancel(ctx)
ListenTarget(lctx, func(ev interface{}) {
var info *target.Info
switch ev := ev.(type) {
case *target.EventTargetCreated:
info = ev.TargetInfo
case *target.EventTargetInfoChanged:
info = ev.TargetInfo
default:
return
}
if info.OpenerID == "" {
return // not a child target
}
if info.Attached {
return // already attached; not a new target
}
if fn(info) {
select {
case <-lctx.Done():
case ch <- info.TargetID:
}
close(ch)
cancel()
}
})
return ch
}