diff options
Diffstat (limited to 'lib/WWW')
-rw-r--r-- | lib/WWW/Curl.pm | 334 | ||||
-rw-r--r-- | lib/WWW/Curl/Easy.pm | 245 | ||||
-rw-r--r-- | lib/WWW/Curl/Form.pm | 12 | ||||
-rw-r--r-- | lib/WWW/Curl/Multi.pm | 14 | ||||
-rw-r--r-- | lib/WWW/Curl/Share.pm | 49 |
5 files changed, 654 insertions, 0 deletions
diff --git a/lib/WWW/Curl.pm b/lib/WWW/Curl.pm new file mode 100644 index 0000000..cdb52c9 --- /dev/null +++ b/lib/WWW/Curl.pm @@ -0,0 +1,334 @@ +package WWW::Curl; + +use strict; +use warnings; +use vars qw(@ISA $VERSION); +use DynaLoader; + +BEGIN { + $VERSION = '4.05'; + @ISA = qw(DynaLoader); + __PACKAGE__->bootstrap; +} + +1; + +__END__ + +=head1 NAME + +WWW::Curl - Perl extension interface for libcurl + +=head1 SYNOPSIS + + use WWW::Curl; + print $WWW::Curl::VERSION; + + +=head1 DESCRIPTION + +WWW::Curl is a Perl extension interface for libcurl. + +=head1 DOCUMENTATION + +This module provides a Perl interface to libcurl. It is not intended to be a standalone module +and because of this, the main libcurl documentation should be consulted for API details at +L<http://curl.haxx.se>. The documentation you're reading right now only contains the Perl specific +details, some sample code and the differences between the C API and the Perl one. + +=head1 WWW::Curl::Easy + +The name might be confusing, it originates from libcurl. This is not an ::Easy module +in the sense normally used on CPAN. + +Here is a small snippet of making a request with WWW::Curl::Easy. + + use strict; + use warnings; + use WWW::Curl::Easy; + + # Setting the options + my $curl = new WWW::Curl::Easy; + + $curl->setopt(CURLOPT_HEADER,1); + $curl->setopt(CURLOPT_URL, 'http://example.com'); + my $response_body; + + # NOTE - do not use a typeglob here. A reference to a typeglob is okay though. + open (my $fileb, ">", \$response_body); + $curl->setopt(CURLOPT_WRITEDATA,$fileb); + + # Starts the actual request + my $retcode = $curl->perform; + + # Looking at the results... + if ($retcode == 0) { + print("Transfer went ok\n"); + my $response_code = $curl->getinfo(CURLINFO_HTTP_CODE); + # judge result and next action based on $response_code + print("Received response: $response_body\n"); + } else { + print("An error happened: ".$curl->strerror($retcode)." ($retcode)\n"); + } + + +=head1 WWW::Curl::Multi + + use strict; + use warnings; + use WWW::Curl::Easy; + use WWW::Curl::Multi; + + my %easy; + my $curl = WWW::Curl::Easy->new; + my $curl_id = '13'; # This should be a handle unique id. + $easy{$curl_id} = $curl; + my $active_handles = 0; + + $curl->setopt(CURLOPT_PRIVATE,$curl_id); + # do the usual configuration on the handle + ... + + my $curlm = WWW::Curl::Multi->new; + + # Add some easy handles + $curlm->add_handle($curl); + $active_handles++; + + while ($active_handles) { + my $active_transfers = $curlm->perform; + if ($active_transfers != $active_handles) { + while (my ($id,$return_value) = $curlm->info_read) { + if ($id) { + $active_handles--; + my $actual_easy_handle = $easy{$id}; + # do the usual result/error checking routine here + ... + # letting the curl handle get garbage collected, or we leak memory. + delete $easy{$id}; + } + } + } + } + +This interface is different than what the C API does. $curlm->perform is non-blocking and performs +requests in parallel. The method does a little work and then returns control, therefor it has to be called +periodically to get the job done. It's return value is the number of unfinished requests. + +When the number of unfinished requests changes compared to the number of active handles, $curlm->info_read +should be checked for finished requests. It returns one handle and it's return value at a time, or an empty list +if there are no more finished requests. $curlm->info_read calls remove_handle on the given easy handle automatically, +internally. The easy handle will still remain available until it goes out of scope, this action just detaches it from +multi. + +Please make sure that the easy handle does not get garbage collected until after the multi handle finishes processing it, +or bad things happen. + +The multi handle does not need to be cleaned up, when it goes out of scope it calls the required cleanup methods +automatically. + +It is possible to use $curlm->add_handle to add further requests to be processed after $curlm->perform has been called. +WWW::Curl::Multi doesn't care about the order. It is possible to process all requests for a multi handle and then add +a new batch of easy handles for processing. + +=head1 WWW::Curl::Share + + use WWW::CURL::Share; + my $curlsh = new WWW::Curl::Share; + $curlsh->setopt(CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE); + $curlsh->setopt(CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS); + $curl->setopt(CURLOPT_SHARE, $curlsh); + $curlsh->setopt(CURLSHOPT_UNSHARE, CURL_LOCK_DATA_COOKIE); + $curlsh->setopt(CURLSHOPT_UNSHARE, CURL_LOCK_DATA_DNS); + +WWW::Curl::Share is an extension to WWW::Curl::Easy which makes it possible +to use a single cookies/dns cache for several Easy handles. + +It's usable methods are: + + $curlsh = new WWW::Curl::Share + This method constructs a new WWW::Curl::Share object. + + $curlsh->setopt(CURLSHOPT_SHARE, $value ); + Enables share for: + CURL_LOCK_DATA_COOKIE use single cookies database + CURL_LOCK_DATA_DNS use single DNS cache + $curlsh->setopt(CURLSHOPT_UNSHARE, $value ); + Disable share for given $value (see CURLSHOPT_SHARE) + + $curlsh->strerror( ErrNo ) + This method returns a string describing the CURLSHcode error + code passed in the argument errornum. + +This is how you enable sharing for a specific WWW::Curl::Easy handle: + + $curl->setopt(CURLOPT_SHARE, $curlsh) + Attach share object to WWW::Curl::Easy instance + + +=head1 COMPATIBILITY + +=over + +=item curl_easy_setopt + +Most of the options should work, however some might not. Please send reports, tests and patches to fix +those. + +=item curl_easy_escape + +Not implemented. Since equivalent Perl code is easily produced, this method will only made +available for interface completeness, if ever. + +=item curl_easy_init + +Used only internally. The standard Perl way of initializing an object should be used, + C<< my $curl = WWW::Curl::Easy->new; >>. + +=item curl_easy_cleanup + +Used only internally. Curl object cleanup happens when the handle goes out of scope. + +=item curl_easy_duphandle + +Should be working for most cases, however do not change the value of options which accept +a list/arrayref value on a duped handle, otherwise memory leaks or crashes will happen. +This behaviour will be fixed in the future. + +=item curl_easy_pause + +Not implemented. + +=item curl_easy_reset + +Not implemented. + +=item curl_easy_unescape + +Not implemented. Trivial Perl replacements are available. + +=item curl_escape + +Not implemented and won't be as this method is considered deprecated. + +=item curl_formadd + +Not yet implemented. + +=item curl_formfree + +When WWW::Curl::Form support is added, this function will be used internally, +but won't be accessible from the public API. + +=item curl_free + +Used internally. Not exposed through the public API, as this call has no relevance +to Perl code. + +=item curl_getdate + +Not implemented. This function is easily replaced by Perl code and as such, most likely +it won't be implemented. + +=item curl_global_cleanup + +Only used internally, not exposed through the public API. + +=item curl_global_init + +Only used internally, not exposed through the public API. + +=item curl_global_init_mem + +Not implemented. + +=item curl_slist_append + +Only used internally, not exposed through the public API. + +=item curl_slist_free_all + +Only used internally, not exposed through the public API. + +=item curl_unescape + +Not implemented and won't be, as this method is considered deprecated. + +=item curl_version_info + +Not yet implemented. + +=item curl_multi_* + +Most methods are either not exposed through the WWW::Curl::Multi API or they behave differently +than it's C counterpart. Please see the section about WWW::Curl::Multi above. + +=back + +=head1 USAGE CASES + +The standard Perl WWW module, LWP should be used in most cases to work with +the HTTP or FTP protocol from Perl. However, there are some cases where LWP doesn't +perform well. One is speed and the other is paralellism. WWW::Curl is much faster, +uses much less CPU cycles and it's capable of non-blocking parallel requests. + +In some cases, for example when building a web crawler, cpu usage and parallel downloads are +important considerations. It can be desirable to use WWW::Curl to do the heavy-lifting of +a large number of downloads and wrap the resulting data into a Perl-friendly structure by +HTTP::Response. + +=head1 CHANGES + +Version 4.01 adds several bugfixes. See Changes file. + +Version 4.00 added new documentation, the build system changed to Module::Install, +the test suite was rewritten to use Test::More, a new calling syntax for WWW::Curl::Multi +was added, memory leak and other bugfixes added, Perl 5.6 and libcurl 7.10.8 as minimum +requirements for this module were set. + +Version 3.12 is a bugfix for a missing Share.pm.in file in the release. + +Version 3.11 added WWW::Curl::Share. + +Version 3.10 adds the WWW::Curl::Share interface by Anton Federov +and large file options after a contribution from Mark Hindley. + +Version 3.02 adds some backwards compatibility for scripts still using +'WWW::Curl::easy' names. + +Version 3.01 added some support for pre-multi versions of libcurl. + +Version 3.00 adds WWW::Curl::Multi interface, and a new module names +following perl conventions (WWW::Curl::Easy rather than WWW::Curl::easy), +by Sebastian Riedel <sri at cpan.org>. + +Version 2.00 of WWW::Curl::easy is a renaming of the previous version +(named Curl::easy), to follow CPAN naming guidelines, by Cris Bailiff. + +Versions 1.30, a (hopefully) threadable, object-oriented, +multiple-callback compatible version of Curl::easy was substantially +reworked from the previous Curl::easy release (1.21) by Cris Bailiff. + +=head1 AUTHORS + +Currently maintained by Cris Bailiff <c.bailiff+curl at devsecure.com> + +Original Author Georg Horn <horn@koblenz-net.de>, with additional callback, +pod and test work by Cris Bailiff <c.bailiff+curl@devsecure.com> and +Forrest Cahoon <forrest.cahoon@merrillcorp.com>. Sebastian Riedel added ::Multi +and Anton Fedorov (datacompboy <at> mail.ru) added ::Share. Balint Szilakszi +repackaged the module into a more modern form. + +=head1 COPYRIGHT + +Copyright (C) 2000-2005,2008 Daniel Stenberg, Cris Bailiff, +Sebastian Riedel, Balint Szilakszi et al. + +You may opt to use, copy, modify, merge, publish, distribute and/or sell +copies of the Software, and permit persons to whom the Software is furnished +to do so, under the terms of the MPL or the MIT/X-derivate licenses. You may +pick one of these licenses. + +=head1 SEE ALSO + +http://curl.haxx.se diff --git a/lib/WWW/Curl/Easy.pm b/lib/WWW/Curl/Easy.pm new file mode 100644 index 0000000..a636534 --- /dev/null +++ b/lib/WWW/Curl/Easy.pm @@ -0,0 +1,245 @@ +package WWW::Curl::Easy; + +use strict; +use warnings; +use Carp; +use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $AUTOLOAD); + +$VERSION = '4.05'; + +require WWW::Curl; +require Exporter; +require AutoLoader; + +@ISA = qw(Exporter DynaLoader); + +# Items to export into callers namespace by default. Note: do not export +# names by default without a very good reason. Use EXPORT_OK instead. +# Do not simply export all your public functions/methods/constants. + +@EXPORT = qw( +CURLOPT_APPEND +CURLOPT_AUTOREFERER +CURLOPT_BUFFERSIZE +CURLOPT_CAINFO +CURLOPT_CAPATH +CURLOPT_CLOSEPOLICY +CURLOPT_CONNECTTIMEOUT +CURLOPT_CONNECTTIMEOUT_MS +CURLOPT_CONNECT_ONLY +CURLOPT_CONV_FROM_NETWORK_FUNCTION +CURLOPT_CONV_FROM_UTF8_FUNCTION +CURLOPT_CONV_TO_NETWORK_FUNCTION +CURLOPT_COOKIE +CURLOPT_COOKIEFILE +CURLOPT_COOKIEJAR +CURLOPT_COOKIELIST +CURLOPT_COOKIESESSION +CURLOPT_COPYPOSTFIELDS +CURLOPT_CRLF +CURLOPT_CUSTOMREQUEST +CURLOPT_DEBUGDATA +CURLOPT_DEBUGFUNCTION +CURLOPT_DIRLISTONLY +CURLOPT_DNS_CACHE_TIMEOUT +CURLOPT_DNS_USE_GLOBAL_CACHE +CURLOPT_EGDSOCKET +CURLOPT_ENCODING +CURLOPT_ERRORBUFFER +CURLOPT_FAILONERROR +CURLOPT_FILE +CURLOPT_FILETIME +CURLOPT_FOLLOWLOCATION +CURLOPT_FORBID_REUSE +CURLOPT_FRESH_CONNECT +CURLOPT_FTPAPPEND +CURLOPT_FTPLISTONLY +CURLOPT_FTPPORT +CURLOPT_FTPSSLAUTH +CURLOPT_FTP_ACCOUNT +CURLOPT_FTP_ALTERNATIVE_TO_USER +CURLOPT_FTP_CREATE_MISSING_DIRS +CURLOPT_FTP_FILEMETHOD +CURLOPT_FTP_RESPONSE_TIMEOUT +CURLOPT_FTP_SKIP_PASV_IP +CURLOPT_FTP_SSL +CURLOPT_FTP_SSL_CCC +CURLOPT_FTP_USE_EPRT +CURLOPT_FTP_USE_EPSV +CURLOPT_HEADER +CURLOPT_HEADERDATA +CURLOPT_HEADERFUNCTION +CURLOPT_HTTP200ALIASES +CURLOPT_HTTPAUTH +CURLOPT_HTTPGET +CURLOPT_HTTPHEADER +CURLOPT_HTTPPOST +CURLOPT_HTTPPROXYTUNNEL +CURLOPT_HTTP_CONTENT_DECODING +CURLOPT_HTTP_TRANSFER_DECODING +CURLOPT_HTTP_VERSION +CURLOPT_IGNORE_CONTENT_LENGTH +CURLOPT_INFILE +CURLOPT_INFILESIZE +CURLOPT_INFILESIZE_LARGE +CURLOPT_INTERFACE +CURLOPT_IOCTLDATA +CURLOPT_IOCTLFUNCTION +CURLOPT_IPRESOLVE +CURLOPT_KEYPASSWD +CURLOPT_KRB4LEVEL +CURLOPT_KRBLEVEL +CURLOPT_LOCALPORT +CURLOPT_LOCALPORTRANGE +CURLOPT_LOW_SPEED_LIMIT +CURLOPT_LOW_SPEED_TIME +CURLOPT_MAXCONNECTS +CURLOPT_MAXFILESIZE +CURLOPT_MAXFILESIZE_LARGE +CURLOPT_MAXREDIRS +CURLOPT_MAX_RECV_SPEED_LARGE +CURLOPT_MAX_SEND_SPEED_LARGE +CURLOPT_NETRC +CURLOPT_NETRC_FILE +CURLOPT_NEW_DIRECTORY_PERMS +CURLOPT_NEW_FILE_PERMS +CURLOPT_NOBODY +CURLOPT_NOPROGRESS +CURLOPT_NOSIGNAL +CURLOPT_OPENSOCKETDATA +CURLOPT_OPENSOCKETFUNCTION +CURLOPT_PORT +CURLOPT_POST +CURLOPT_POST301 +CURLOPT_POSTFIELDS +CURLOPT_POSTFIELDSIZE +CURLOPT_POSTFIELDSIZE_LARGE +CURLOPT_POSTQUOTE +CURLOPT_PREQUOTE +CURLOPT_PRIVATE +CURLOPT_PROGRESSDATA +CURLOPT_PROGRESSFUNCTION +CURLOPT_PROXY +CURLOPT_PROXYAUTH +CURLOPT_PROXYPORT +CURLOPT_PROXYTYPE +CURLOPT_PROXYUSERPWD +CURLOPT_PROXY_TRANSFER_MODE +CURLOPT_PUT +CURLOPT_QUOTE +CURLOPT_RANDOM_FILE +CURLOPT_RANGE +CURLOPT_READFUNCTION +CURLOPT_REFERER +CURLOPT_RESUME_FROM +CURLOPT_RESUME_FROM_LARGE +CURLOPT_SEEKDATA +CURLOPT_SEEKFUNCTION +CURLOPT_SHARE +CURLOPT_SOCKOPTDATA +CURLOPT_SOCKOPTFUNCTION +CURLOPT_SSH_AUTH_TYPES +CURLOPT_SSH_HOST_PUBLIC_KEY_MD5 +CURLOPT_SSH_PRIVATE_KEYFILE +CURLOPT_SSH_PUBLIC_KEYFILE +CURLOPT_SSLCERT +CURLOPT_SSLCERTPASSWD +CURLOPT_SSLCERTTYPE +CURLOPT_SSLENGINE +CURLOPT_SSLENGINE_DEFAULT +CURLOPT_SSLKEY +CURLOPT_SSLKEYPASSWD +CURLOPT_SSLKEYTYPE +CURLOPT_SSLVERSION +CURLOPT_SSL_CIPHER_LIST +CURLOPT_SSL_CTX_DATA +CURLOPT_SSL_CTX_FUNCTION +CURLOPT_SSL_SESSIONID_CACHE +CURLOPT_SSL_VERIFYHOST +CURLOPT_SSL_VERIFYPEER +CURLOPT_STDERR +CURLOPT_TCP_NODELAY +CURLOPT_TELNETOPTIONS +CURLOPT_TIMECONDITION +CURLOPT_TIMEOUT +CURLOPT_TIMEOUT_MS +CURLOPT_TIMEVALUE +CURLOPT_TRANSFERTEXT +CURLOPT_UNRESTRICTED_AUTH +CURLOPT_UPLOAD +CURLOPT_URL +CURLOPT_USERAGENT +CURLOPT_USERPWD +CURLOPT_USE_SSL +CURLOPT_VERBOSE +CURLOPT_WRITEDATA +CURLOPT_WRITEFUNCTION +CURLOPT_WRITEHEADER +CURLOPT_WRITEINFO +CURLINFO_CONNECT_TIME +CURLINFO_CONTENT_LENGTH_DOWNLOAD +CURLINFO_CONTENT_LENGTH_UPLOAD +CURLINFO_CONTENT_TYPE +CURLINFO_COOKIELIST +CURLINFO_DATA_IN +CURLINFO_DATA_OUT +CURLINFO_EFFECTIVE_URL +CURLINFO_END +CURLINFO_FILETIME +CURLINFO_FTP_ENTRY_PATH +CURLINFO_HEADER_IN +CURLINFO_HEADER_OUT +CURLINFO_HEADER_SIZE +CURLINFO_HTTPAUTH_AVAIL +CURLINFO_HTTP_CODE +CURLINFO_HTTP_CONNECTCODE +CURLINFO_LASTONE +CURLINFO_LASTSOCKET +CURLINFO_NAMELOOKUP_TIME +CURLINFO_NONE +CURLINFO_NUM_CONNECTS +CURLINFO_OS_ERRNO +CURLINFO_PRETRANSFER_TIME +CURLINFO_PRIVATE +CURLINFO_PROXYAUTH_AVAIL +CURLINFO_REDIRECT_COUNT +CURLINFO_REDIRECT_TIME +CURLINFO_REDIRECT_URL +CURLINFO_REQUEST_SIZE +CURLINFO_RESPONSE_CODE +CURLINFO_SIZE_DOWNLOAD +CURLINFO_SIZE_UPLOAD +CURLINFO_SPEED_DOWNLOAD +CURLINFO_SPEED_UPLOAD +CURLINFO_SSL_DATA_IN +CURLINFO_SSL_DATA_OUT +CURLINFO_SSL_ENGINES +CURLINFO_SSL_VERIFYRESULT +CURLINFO_STARTTRANSFER_TIME +CURLINFO_TEXT +CURLINFO_TOTAL_TIME +); + +$WWW::Curl::Easy::headers = ""; +$WWW::Curl::Easy::content = ""; + +sub AUTOLOAD { + + # This AUTOLOAD is used to 'autoload' constants from the constant() + # XS function. + + ( my $constname = $AUTOLOAD ) =~ s/.*:://; + return constant( $constname, 0 ); +} + +1; + +__END__ + +Copyright (C) 2000-2005,2008 Daniel Stenberg, Cris Bailiff, +Sebastian Riedel, et al. + +You may opt to use, copy, modify, merge, publish, distribute and/or sell +copies of the Software, and permit persons to whom the Software is furnished +to do so, under the terms of the MPL or the MIT/X-derivate licenses. You may +pick one of these licenses. diff --git a/lib/WWW/Curl/Form.pm b/lib/WWW/Curl/Form.pm new file mode 100644 index 0000000..dd69c68 --- /dev/null +++ b/lib/WWW/Curl/Form.pm @@ -0,0 +1,12 @@ +package WWW::Curl::Form; +use strict; + +# In development! +# +#require WWW::Curl; +#use vars qw(@ISA @EXPORT_OK); +#require Exporter; +#require AutoLoader; +# @ISA = qw(Exporter DynaLoader); + +1; diff --git a/lib/WWW/Curl/Multi.pm b/lib/WWW/Curl/Multi.pm new file mode 100644 index 0000000..cc64628 --- /dev/null +++ b/lib/WWW/Curl/Multi.pm @@ -0,0 +1,14 @@ +package WWW::Curl::Multi; + +use strict; +use WWW::Curl; + +1; +__END__ + +Copyright (C) 2004 Sebastian Riedel, et al. + +You may opt to use, copy, modify, merge, publish, distribute and/or sell +copies of the Software, and permit persons to whom the Software is furnished +to do so, under the terms of the MPL or the MIT/X-derivate licenses. You may +pick one of these licenses. diff --git a/lib/WWW/Curl/Share.pm b/lib/WWW/Curl/Share.pm new file mode 100644 index 0000000..4da2183 --- /dev/null +++ b/lib/WWW/Curl/Share.pm @@ -0,0 +1,49 @@ +package WWW::Curl::Share; + +use strict; +use warnings; +use Carp; +use vars qw(@ISA @EXPORT @EXPORT_OK $AUTOLOAD); + +use WWW::Curl; +require Exporter; +require AutoLoader; + +@ISA = qw(Exporter DynaLoader); + +@EXPORT = qw( +CURLSHOPT_LAST +CURLSHOPT_LOCKFUNC +CURLSHOPT_NONE +CURLSHOPT_SHARE +CURLSHOPT_UNLOCKFUNC +CURLSHOPT_UNSHARE +CURLSHOPT_USERDATA +CURL_LOCK_DATA_CONNECT +CURL_LOCK_DATA_COOKIE +CURL_LOCK_DATA_DNS +CURL_LOCK_DATA_LAST +CURL_LOCK_DATA_NONE +CURL_LOCK_DATA_SHARE +CURL_LOCK_DATA_SSL_SESSION +); + +sub AUTOLOAD { + + # This AUTOLOAD is used to 'autoload' constants from the constant() + # XS function. + + ( my $constname = $AUTOLOAD ) =~ s/.*:://; + return constant( $constname, 0 ); +} + +1; +__END__ + + +Copyright (C) 2008, Anton Fedorov (datacompboy <at> mail.ru) + +You may opt to use, copy, modify, merge, publish, distribute and/or sell +copies of the Software, and permit persons to whom the Software is furnished +to do so, under the terms of the MPL or the MIT/X-derivate licenses. You may +pick one of these licenses. |