from asyncio. constants import ACCEPT_RETRY_DELAY
from urllib. request import urlopen
webpage = urlopen ( 'http://www.python.org' )
#假设要提取刚才所打开网页中链接About的相对URL,可使用正则表达式
import re
text = webpage. read ( )
m = re. search ( b'<a href="([^"]+)" .*?>about</a>' , text, re. IGNORECASE)
m. group ( 1 )
'/about/'
#如果要给下载的副本指定文件名,可通过第二个参数来提供。
urlretrieve ( 'http://www.python.org' , 'C:\\python_webpage.html' )
#代码清单14 - 3 基于SocketServer的极简服务器
from socketserver import TCPServer, StreamRequestHandler
class Handler ( StreamRequestHandler) :
def handle ( self) :
addr = self. request. getpeername ( )
print ( 'Got connection from' , addr)
self. wfile. write ( 'Thank you for connecting' )
server = TCPServer ( ( '' , 1234 ) , Handler)
server. serve_forever ( )
#代码清单14 - 4 分叉服务器
from socketserver import TCPServer, ForkingMixIN, StreamRequestHandler
class Server ( ForkingMixIN, TCPServer) : pass
class Handler ( StreamRequestHandler) :
def handle ( self) :
addr = self. request. getpeername ( )
print ( 'Got connection from' , addr)
self wfile. write ( 'Thank you for connecting' )
server = Server ( ( '' , 1234 ) , Handler)
server. serve_forever ( )
#代码清单14 - 5 线程化服务器
from socketserver import TCPServer, ThreadingMixIN, StreamRequestHandler
class Server ( ThreadingMixIN, TCPServer) : pass
class Handler ( StreamRequestHandler) :
def handle ( self) :
addr = self. request. getpeername ( )
print ( 'Got connection from' , addr)
self. wfile. write ( 'Thank you for connecting' )
server = Server ( ( '' , 1234 ) , Handler)
server. serve_forever ( )
#代码清单14 - 6 使用select的简单服务器
import socket, select
s = socket. socket ( )
host = socket. gethostname ( )
port = 1234
s. bind ( ( host, port) )
s. listen ( 5 )
inputs = [ s]
while True:
rs, ws, es = select. select ( inputs, [ ] , [ ] )
for r in rs:
if r is s:
c, addr = s. accept ( )
print ( 'Got connection from' , addr)
inputs. append ( c)
else :
try:
data = r. recv ( 1024 )
disconnected = not data
except socket. error:
disconnected = True
if disconnected:
print ( r. getpeername ( ) , 'disconnected' )
inputs. remove ( r)
else :
print ( data)
#代码清单14 - 7 使用poll的简单服务器
import socket, select
s = socket. socket ( )
host = socket. gethostname ( )
port = 1234
s. bind ( ( host, port) )
fdmap = { s. fileno ( ) : s}
s. listen ( 5 )
p = select. poll ( )
p. register ( s)
while True:
events = p. poll ( )
for fd, event in events:
if fd in fdmap:
c, addr = s. accept ( )
print ( 'Got connection from' , addr)
p. register ( c)
fdmap[ c. fileno ( ) ] = c
elif event & select. POLLIN:
data = fdmap[ fd] . recv ( 1024 )
if not data: # 没有数据 -- 连接已关闭
print ( fdmap[ fd] . getpeername ( ) , 'disconnected' )
p. unregister ( fd)
del fadmap[ fd]
else :
print ( data)
#代码清单14 - 8 使用Twisted创建的简单服务器
from twisted. internet import reactor
from twisted. internet. protocol import Protocol, Factory
class SimpleLogger ( Protocol) :
def connectionMade ( self) :
print ( 'Got connection from' , self. transport. client)
def connectionLost ( self, reason) :
print ( self. transport. client, 'disconnected' )
def dataReceived ( self, data) :
print ( data)
factory = Factory ( )
factory. protocol = SimpleLogger
reactor. listenTCP ( 1234 , factory)
reactor. run ( )
#代码清单14 - 9 使用协议LineReceiver改进后的日志服务器
from twisted. internet import reactor
from twisted. internet. protocol import Factory
from twisted. protocols. basic import LineReceiver
class SimpleLogger ( LineReceiver) :
def connectionMade ( self) :
print ( 'Got connection from' , self. transport. client)
def connectionLost ( self, reason) :
print ( self. transport. client, 'disconnected' )
def lineReceived ( self, line) :
print ( line)
factory = Factory ( )
factory. protocol = SimpleLogger
reactor. listenTCP ( 1234 , factory)
reactor. run ( )
#代码清单15 - 1 简单的屏幕抓取程序
from urllib. request import urlopen
import re
p = re. compile ( '< a href = "(/jobs/\\d+)/" > ( . * ? ) < / a> ')
text = urlopen ( 'http://python.org/jobs' ) . read ( ) . decode ( )
for url, name in p. findall ( text) :
print ( '{} ({})' . format ( name, url) )
'' '
< h1> Pet Shop
< h2> Complaints< / h3>
< p> There is < b> no < i> way< / b> at all< / i> we can accept returned parrots.
< h1> < i> Dead Pets< / h1>
< p> Our pets may tend to rest at times, but rarely die within the warrantly period.
< i> < h2> News< / h2> < / i>
< p> We have just received < b> a really nice parrot.
< p> It's really nice. < / b>
< h3> < hr> The Norwegian Blue< / h3>
< h4> Plumage and < hr> pining behavior< / h4>
< a href= "#norwegain-blue" > More information< a>
< p> Features:
< body>
< li> Beautiful plumage
'' '
#下面是Tidy修复后的版本:
< ! DOCTYPE html>
< html>
< head>
< title> < / title>
< / head>
< body>
< h1> Pet Shop< / h1>
< h2> Complaints< / h2>
< p> There is < b> no < i> way< / i> < / b> < i> at all< / i> we can accept returned parrots. < / p>
< h1> < i> Dead Pets< / i> < / h1>
< p> < i> Our pets may tend to rest at times, but rarely die within the warrantly period. < / i> < / p>
< h2> < i> News< / i> < / h2>
< p> We have just received < b> a really nice parrot. < / b> < / p>
< p> < b> It's really nice. < / b> < / p>
< hr>
< h3> The Norwegain Blue< / h3>
< h4> Plumage and< / h4>
< hr>
< h4> pining behavior< / h4>
< a href= "#norwegain-blue" > More information< / a>
< p> Features: < / p>
< ul>
< li> Beautiful plumage< / li>
< / ul>
< / body>
< / html>
#例如,假设你有一个混乱的HTML文件(messy. html),且在执行路径中包含命令行版Tidy,下面的程序将对这个文件运行Tidy并将结果打印出来:
from subprocess import Popen, PIPE
text = open ( 'messy.html' ) . read ( )
tidy = Popen ( 'tidy' , stdin = PIPE, stdout = PIPE, stderr = PIPE)
tidy. stdin . write ( text. encode ( ) )
tidy. stdin . close ( )
print ( tidy. stdout . read ( ) . decode ( ) )
#代码清单15 - 2 使用模块HTMLParser的屏幕抓取程序
from urllib. request import urlopen
from html. parser import HTMLParser
def isjob ( url) :
try:
a, b, c, d= url. split ( '/' )
except ValueError:
return False
return a== d== '' and b== 'jobs' and c. isdigit ( )
class Scraper ( HTMLParser) :
in_link = False
def handle_starttag ( self, tag, attrs) :
attrs = dict ( attrs)
url = attrs. get ( 'href' , '' )
if tag == 'a' and isjob ( url) :
self. url= url
self. in_link= True
self. chunks= [ ]
def handle_data ( self, data) :
if self. in_link:
self. chunks. append ( data)
def handle_endtag ( self, tag) :
if tag == 'a' and self. in_link:
print ( '{} ({})' . format ( '' . join ( self. chunks) , self. url) )
self. in_link= False
text = urlopen ( 'http://python.org/jobs' ) . read ( ) . decode ( )
parser = Scraper ( )
parser. feed ( text)
parser. close ( )
#代码清单15 - 3 使用Beautiful Soup的屏幕抓取程序
from urllib. request import urlopen
from bs4 import BeautifulSoup
text = urlopen ( 'http://python.org/jobs' ) . read ( )
soup = BeautifulSoup ( text, 'html.parser' )
jobs = set ( )
for job in soup. body. section ( 'h2' ) :
jobs. add ( '{} ({})' . format ( job. a. string, job. a[ 'href' ] ) )
print ( '\n' . join ( sorted ( jobs, key= str. lower) ) )
#代码清单15 - 4 简单的CGI脚本
#! / usr/ bin/ env python
print ( 'Content-type:text/plain' )
print ( ) ## 打印一个空行,以结束首部
print ( 'Hello,world!' )
#代码清单15 - 5 显示栈跟踪的CGI脚本(faulty. cgi)
#! / usr/ bin/ env python
import cgitb; cgitb. enable ( )
print ( 'Content-type:text/html\n' )
print ( 1 / 0 )
print ( 'Hello,world!' )
#代码清单15 - 6 从FieldStorage中获取单个值的CGI脚本(simple2. cgi)
#! / usr/ bin/ env python
import cgi
from = cgi. FieldStorage ( )
name = form. getvalue ( 'name' , 'world' )
print ( 'Content-type:text/plain\n' )
print ( 'Hello,{}!' . format ( name) )
#代码清单15 - 7 包含HTML表单的问候脚本(simple3. cgi)
#! / usr/ bin/ env python
import cgi
form = cgi. FieldStorage ( )
name = form. getvalue ( 'name' , 'world' )
print ( "" "Content- type: text/ html
< html>
< head>
< title> Greeting Page< / title>
< / head>
< body>
< h1> Hello, { } ! < / h1>
< form action = 'simple3.cgi' >
Change name < input type = 'text' name= 'name' / >
< input type = 'submit' / >
< / form>
< / body>
< / html>
"" ". format ( name) )
'' '
在这个脚本开头,与以前一样获取CGI参数name,并将默认值设置为'world' 。如果在浏览器
中打开这个脚本时没有提交任何值,将使用默认值。
接下来,打印了一个简单的HTML页面,其中的标题包含参数name的值。另外,这个页面还
包含一个HTML表单,该表单的属性action被设置为脚本的名称(simple3. cgi)。这意味着提交表
单后,将再次运行这个脚本。这个表单只包含一个输入元素——名为name的文本框。因此,如果
你在文本框中输入新名字并提交表单,标题将发生变化,因为现在参数name包含值。
'' '
# https :
# JSON,http:
#代码清单16 - 1 简单的测试程序
from area import rect_area
height = 3
width = 4
correct_answer = 12
answer = rect_area ( height, width)
if answer == correct_answer:
print ( 'Test passed' )
else :
print ( 'Test failed' )
#代码清单16 - 3 使用模块subprocess调用外部检查器
import unittest, my_math
from subprocess import Popen, PIPE
class ProductTestCase ( unittest. TestCase) :
#在这里插入以前的测试
def test_with_PyChecker ( self) :
cmd = 'pychecker' , '-Q' , my_math. __file__. rstrip ( 'c' )
pychecker = Popen ( cmd, stdout = PIPE, stderr = PIPE)
self. assertEqual ( pychecker. stdout . read ( ) , '' )
def test_with_PyLint ( self) :
cmd = 'pylint' , '-rn' , 'my_math'
pylint = Popen ( cmd, stdout = PIPE, stderr = PIPE)
self. assertEqual ( pylint. stdout . read ( ) , '' )
if __name__ == '__main__' : unittest. main ( )
#代码清单17 - 1 一个简单的Java类(JythonTest. java)
public class JythonTest {
public void greeting ( ) {
System. out. println ( "Hello,world!" ) ;
}
}
#$ javac JythonTest. java
#代码清单17 - 2 一个简单的C#类(IronPythonTest. cs)
using System;
namespace FePyTest{
public class IronPythonTest{
public void greeting ( ) {
Console. WriteLine ( "Hello,world" ) ;
}
}
}
#对于Microsoft . NET,命令如下:
# csc . exe / t: library IronPythonTest. cs
#《C语言入门经典(第5 版)》 《C程序设计语言(第2 版)》
# NumPy(http:
#一个简单的检测回文的C语言函数(palindrome. c)
# include <string.h>
int is_palindrome ( char * text) {
int i, n= strlen ( text) ;
for ( i= 0 ; I<= n/ 2 ; ++ i) {
if ( text[ i] != text[ n- i- 1 ] ) return 0 ;
}
return 1 ;
}
#代码清单17 - 4 检测回文的Python函数
def is_palindrome ( text) :
n = len ( text)
for i in range ( len ( text)
if text[ i] != text[ n- i- 1 ] :
return False
return True
#代码清单17 - 5 回文检测库的接口(palindrome. i)
% module palindrome
% {
# include <string.h>
% }
extern int is_palindrome ( char * text) ;
#下面是一个在Solaris系统中使用编译器cc的示例(这里假设$PYTHON_HOME指向Python安装目录):
# cc - c palindrome. c
# cc - I$PYTHON_HOME - I$PYTHON_HOME/ Include - c palindrome_wrap. c
# cc - G palidrome. o palindrome_wrap. o - o _palindrome. so