Python抓取JSON网页内容

# -*- coding: UTF-8 -*-
'''
Created on 20150206

@author: Hansen
'''

import urllib2
import sys
import io
import json

#Fetch HTML from URL
def fecth_html(index,url,keepHtml,resultFile):
    req = urllib2.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0')
    rsp = urllib2.urlopen(req)
    content = rsp.read()
    #receive_header = rsp.info()
    #print(sys.getfilesystemencoding())
    #content = content.decode('utf-8','replace')
    
    if keepHtml:
        fileinfo = open(str(index)+'.html','w')
        fileinfo.write(content)
        print("save file "+ str(index)+'.html:    ok')
        
    parse_json(content,resultFile)
    
#Parse HTML
def parse_json(content,resultFile):
    jsonData = json.loads(content)
    shops = jsonData['shopBeans']
    print(len(shops))
    for shop in shops:
        szTitle = shop['filterFullName']
        szTitle = szTitle.replace("\r\n", "-").replace(" ","");
        szStar = shop['shopPowerTitle']
        szMeanPrice = str(shop['avgPrice'])
        szMeanPrice = szMeanPrice.replace("\n", "").replace(" ","");
        szAddressA = shop['mainRegionName']
        szAddressB = shop['address']
        szAddress = (szAddressA+"-"+szAddressB).replace("\r\n", "-").replace(" ","");
        szTaste = shop['refinedScore1']
        szEvn = shop['refinedScore2']
        szService = shop['refinedScore3']
        
        fileinfo = io.open(resultFile,'a',encoding='utf_16')
        fileinfo.write(szTitle+","+szStar+","+szMeanPrice+","+szAddress+"," +szTaste+","+szEvn+","+szService+"\n")

第一次拆iPhone4S

这绝对是no zuo no die的典型事迹啊。

iPhone4S美版,IOS5.1.1,卡贴解锁,已越狱。美中不足,锁屏键坏掉了。
我当然不能妥协了,于是淘宝上买了个开机排线(感光排线),一个城市三天才到。

晚上下班后,开始了拆机之旅,拆到一半发现没带镊子,于是回家搞了个镊子。
回来继续拆,iPhone 4S比iPhone4难拆一些,尤其是前置摄像头保护罩,用了一刻钟才弄下来。
全部拆掉后,换掉排线,开始组装。

感光排线有些难弄,但还好啦。
主板上好,弄右上角的wifi排线,插好了,发现排线位置不对,与是撬棒弄下来准备再插一下。
靠。。。。。。
不小心把主板上的wifi排线插座弄掉了,整个脱焊了,吐血啊。。。
怀着复杂的心情,将手机装好,发现果真wifi时断时续,基本没法用了。
心情异常沉重,距离成功只有一步之遥啊。。。
查了一下,要重新焊上才行。
虽然能借到电烙铁,但以前只弄过很简单的焊点,这个还是不要自己弄了吧。

今天,跑了一趟电脑城,遇到了一个靠谱的师傅,十几分钟就给我搞定了。
当然,师傅对我可以把这个弄掉表示无法理解,另外鄙视我“和钱过不去”。
好吧,我承认,25块能搞定的东西,我最后花了175。
吐血啊。。。

还好地方是,我自己换上的地方,全都好用哦,哈哈哈哈哈哈哈。
下次继续自己拆。

另外,回来的时候,出租车师傅教导我:
1、心态要好,别瞎着急,着急解决不了问题
2、与人沟通的时候,要注意沟通方式和预期
第一次感觉打车赚到了。。。

CSharp抓取JSON网页内容

    using Newtonsoft.Json;
    using DaZhongDianPing.JsonBeans;

    class JsonCrawler
    {
        private PhaseResultBean PhaseJson(Uri uri, String szResultPath, String szErrorPath)
        {
            PhaseResultBean result = new PhaseResultBean();

            try
            {
                //取回网页
                WebClient client = new WebClient();
                client.Headers.Add("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");
                Byte[] pageData = client.DownloadData(uri);
                string pageHtml = Encoding.UTF8.GetString(pageData);
                JsonBeans.TopShopsBean topShops = JsonConvert.DeserializeObject<JsonBeans.TopShopsBean>(pageHtml);

                //分析Json
                int len = topShops.ShopBeans.Length;
                result.total = len;

                foreach (ShopBean shop in topShops.ShopBeans)
                {
                    try
                    {
                        String szTitle = shop.FilterFullName;
                        if (szTitle != null) szTitle = szTitle.Replace("\r\n", "-");
                        String szStar = shop.ShopPowerTitle;
                        String szMeanPrice = shop.AvgPrice.ToString();
                        String szRegionName = shop.MainRegionName;
                        String szAddress = shop.Address;
                        if (szAddress != null) szAddress.Replace(",", "-");

                        String szTaste = shop.RefinedScore1;
                        String szEvn = shop.RefinedScore2;
                        String szService = shop.RefinedScore3;

                        //将获取的内容写入文本
                        using (StreamWriter sw = new StreamWriter(szResultPath, true))
                        {
                            sw.WriteLine(szTitle + "," + szStar + "," + szMeanPrice + "," + szRegionName + "," + szAddress + "," + szTaste + "," + szEvn + "," + szService);
                        }

                        result.successed += 1;
                    }
                    catch (Exception Ex)
                    {
                        using (StreamWriter sw = new StreamWriter(szErrorPath, true))
                        {
                            sw.WriteLine(Ex.Message);
                        } 
                        result.failed += 1;
                    }
                }
            }
            catch (WebException webEx)
            {
                using (StreamWriter sw = new StreamWriter(szErrorPath, true))
                {
                    sw.WriteLine(webEx.Message);
                }
                result.bSuccess = false;
            }

            return result;
        }
    }

    class PhaseResultBean
    {
        public Boolean bSuccess;
        public int total;
        public int successed;
        public int failed;
    }

    public enum JsonEnginType
    {
        JsonEngin_Newtonsoft
    }

    internal class ShopBean
    {

        [JsonProperty("addDate")]
        public string AddDate { get; set; }

        [JsonProperty("addUser")]
        public object AddUser { get; set; }

        [JsonProperty("addUserName")]
        public object AddUserName { get; set; }

        [JsonProperty("address")]
        public string Address { get; set; }

        [JsonProperty("altName")]
        public string AltName { get; set; }

        [JsonProperty("avgPrice")]
        public int AvgPrice { get; set; }

        [JsonProperty("branchName")]
        public string BranchName { get; set; }

        [JsonProperty("branchTotal")]
        public int BranchTotal { get; set; }

        [JsonProperty("businessHours")]
        public string BusinessHours { get; set; }

        [JsonProperty("canSendSms")]
        public object CanSendSms { get; set; }

        [JsonProperty("categoryId")]
        public int CategoryId { get; set; }

        [JsonProperty("cityId")]
        public int CityId { get; set; }

        [JsonProperty("crossRoad")]
        public string CrossRoad { get; set; }

        [JsonProperty("defaultPic")]
        public string DefaultPic { get; set; }

        [JsonProperty("defaultPicBig")]
        public object DefaultPicBig { get; set; }

        [JsonProperty("dishTagList")]
        public string[][] DishTagList { get; set; }

        [JsonProperty("dishTags")]
        public string DishTags { get; set; }

        [JsonProperty("district")]
        public int District { get; set; }

        [JsonProperty("districtName")]
        public object DistrictName { get; set; }

        [JsonProperty("filterFullAdress")]
        public string FilterFullAdress { get; set; }

        [JsonProperty("filterFullName")]
        public string FilterFullName { get; set; }

        [JsonProperty("firstReviewId")]
        public int FirstReviewId { get; set; }

        [JsonProperty("firstUserFace")]
        public object FirstUserFace { get; set; }

        [JsonProperty("firstUserNickName")]
        public object FirstUserNickName { get; set; }

        [JsonProperty("fullAdress")]
        public string FullAdress { get; set; }

        [JsonProperty("fullName")]
        public string FullName { get; set; }

        [JsonProperty("glat")]
        public object Glat { get; set; }

        [JsonProperty("glng")]
        public object Glng { get; set; }

        [JsonProperty("groupFlag")]
        public object GroupFlag { get; set; }

        [JsonProperty("hasStaticMap")]
        public object HasStaticMap { get; set; }

        [JsonProperty("hits")]
        public int Hits { get; set; }

        [JsonProperty("isUserCanUpdate")]
        public object IsUserCanUpdate { get; set; }

        [JsonProperty("lastDate")]
        public string LastDate { get; set; }

        [JsonProperty("lastIp")]
        public object LastIp { get; set; }

        [JsonProperty("lastUser")]
        public object LastUser { get; set; }

        [JsonProperty("lastUserName")]
        public object LastUserName { get; set; }

        [JsonProperty("mainCategoryId")]
        public int MainCategoryId { get; set; }

        [JsonProperty("mainCategoryName")]
        public object MainCategoryName { get; set; }

        [JsonProperty("mainRegionId")]
        public int MainRegionId { get; set; }

        [JsonProperty("mainRegionName")]
        public string MainRegionName { get; set; }

        [JsonProperty("minUserMana")]
        public object MinUserMana { get; set; }

        [JsonProperty("monthlyHits")]
        public int MonthlyHits { get; set; }

        [JsonProperty("nearByTags")]
        public object NearByTags { get; set; }

        [JsonProperty("nearbyShops")]
        public object NearbyShops { get; set; }

        [JsonProperty("oldChainId")]
        public object OldChainId { get; set; }

        [JsonProperty("phoneNo")]
        public string PhoneNo { get; set; }

        [JsonProperty("phoneNo2")]
        public string PhoneNo2 { get; set; }

        [JsonProperty("picTotal")]
        public int PicTotal { get; set; }

        [JsonProperty("popularity")]
        public int Popularity { get; set; }

        [JsonProperty("power")]
        public int Power { get; set; }

        [JsonProperty("prevWeeklyHits")]
        public object PrevWeeklyHits { get; set; }

        [JsonProperty("priceInfo")]
        public object PriceInfo { get; set; }

        [JsonProperty("priceLevel")]
        public int PriceLevel { get; set; }

        [JsonProperty("primaryTag")]
        public string PrimaryTag { get; set; }

        [JsonProperty("promoId")]
        public int PromoId { get; set; }

        [JsonProperty("publicTransit")]
        public string PublicTransit { get; set; }

        [JsonProperty("refinedScore1")]
        public string RefinedScore1 { get; set; }

        [JsonProperty("refinedScore2")]
        public string RefinedScore2 { get; set; }

        [JsonProperty("refinedScore3")]
        public string RefinedScore3 { get; set; }

        [JsonProperty("regionId")]
        public int RegionId { get; set; }

        [JsonProperty("score")]
        public int Score { get; set; }

        [JsonProperty("score1")]
        public int Score1 { get; set; }

        [JsonProperty("score2")]
        public int Score2 { get; set; }

        [JsonProperty("score3")]
        public int Score3 { get; set; }

        [JsonProperty("score4")]
        public int Score4 { get; set; }

        [JsonProperty("searchKeyWord")]
        public object SearchKeyWord { get; set; }

        [JsonProperty("searchName")]
        public object SearchName { get; set; }

        [JsonProperty("shopGroupId")]
        public int ShopGroupId { get; set; }

        [JsonProperty("shopId")]
        public int ShopId { get; set; }

        [JsonProperty("shopName")]
        public string ShopName { get; set; }

        [JsonProperty("shopPower")]
        public int ShopPower { get; set; }

        [JsonProperty("shopPowerTitle")]
        public string ShopPowerTitle { get; set; }

        [JsonProperty("shopTagList")]
        public string[][] ShopTagList { get; set; }

        [JsonProperty("shopTags")]
        public string ShopTags { get; set; }

        [JsonProperty("shopTotalName")]
        public string ShopTotalName { get; set; }

        [JsonProperty("shopType")]
        public int ShopType { get; set; }

        [JsonProperty("similarShops")]
        public object SimilarShops { get; set; }

        [JsonProperty("suggestGA")]
        public object SuggestGA { get; set; }

        [JsonProperty("suggestReason")]
        public object SuggestReason { get; set; }

        [JsonProperty("todayHits")]
        public object TodayHits { get; set; }

        [JsonProperty("voteTotal")]
        public int VoteTotal { get; set; }

        [JsonProperty("webSite")]
        public object WebSite { get; set; }

        [JsonProperty("weeklyHits")]
        public int WeeklyHits { get; set; }

        [JsonProperty("wishTotal")]
        public object WishTotal { get; set; }

        [JsonProperty("writeUp")]
        public string WriteUp { get; set; }
    }

    internal class TopShopsBean
    {

        [JsonProperty("categoryId")]
        public int CategoryId { get; set; }

        [JsonProperty("cityId")]
        public int CityId { get; set; }

        [JsonProperty("maxResults")]
        public int MaxResults { get; set; }

        [JsonProperty("rankType")]
        public int RankType { get; set; }

        [JsonProperty("shopBeans")]
        public ShopBean[] ShopBeans { get; set; }

        [JsonProperty("shopType")]
        public int ShopType { get; set; }

        [JsonProperty("skipResults")]
        public int SkipResults { get; set; }
    }

CSharp抓取HTML网页内容

    using mshtml;
    using HtmlAgilityPack;

    class HTMLCrawler
    {
        private PhaseResultBean PhaseHtml(int index, Uri uri, String szResultPath, String szErrorPath, HTMLEnginType htmlEngin)
        {
            PhaseResultBean result = new PhaseResultBean();
            try
            {
                WebClient client = new WebClient();
                client.Headers.Add("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");
                Byte[] pageData = client.DownloadData(uri);
                string pageHtml = Encoding.UTF8.GetString(pageData);
                if (checkSavePages.Checked)
                {
                    String szHtmlPath = XWin32.getExeParentPath() + index.ToString()+".html";
                    using (StreamWriter sw = new StreamWriter(szHtmlPath, true))
                    {
                        sw.WriteLine(pageHtml);
                    }
                }

                switch(htmlEngin)
                {
                    case HTMLEnginType.HTMLEngin_mshtml:
                        PhaseHtml_mshtml(pageHtml, szResultPath, szErrorPath, result);
                        break;
                    case HTMLEnginType.HTMLEngin_HtmlAgilityPack:
                        PhaseHtml_HtmlAgilityPack(pageHtml, szResultPath, szErrorPath, result);
                        break;
                }
            }
            catch (WebException webEx)
            {
                using (StreamWriter sw = new StreamWriter(szErrorPath, true))
                {
                    sw.WriteLine(webEx.Message);
                }
                result.bSuccess = false;
            }

            return result;
        }

        private void PhaseHtml_mshtml(String pageHtml, String szResultPath, String szErrorPath, PhaseResultBean result)
        {
            mshtml.HTMLDocument docObject = new mshtml.HTMLDocument();
            mshtml.IHTMLDocument2 doc2 = docObject as mshtml.IHTMLDocument2;
            doc2.write(pageHtml);
            doc2.close();

            mshtml.IHTMLDocument3 doc3 = docObject as mshtml.IHTMLDocument3;

            int len = doc3.getElementById("shop-all-list").children[0].children.length;
            result.total += len;

            foreach (IHTMLElement li in doc3.getElementById("shop-all-list").children[0].children)
            {
                try
                {
                    IHTMLElement title = li.children[1].children[0];
                    String szTitle = title.innerText;
                    if (szTitle != null) szTitle = szTitle.Replace("\r\n", "-");
                    IHTMLElement star = li.children[1].children[1].children[0];
                    String szStar = star.getAttribute("title");
                    IHTMLElement reviewNum = li.children[1].children[1].children[1];
                    String szReviewNum = reviewNum.innerText;
                    IHTMLElement meanPrice = li.children[1].children[1].children[3];
                    String szMeanPrice = meanPrice.innerText;
                    IHTMLElement category = li.children[1].children[2].children[0];
                    String szCategory = category.innerText;
                    IHTMLElement address = li.children[1].children[2].children[3];
                    String szAddress = address.innerText;
                    if (szAddress != null) szAddress.Replace(",", "-");

                    IHTMLElement taste = li.children[1].children[3].children[0];
                    String szTaste = taste.innerText;
                    IHTMLElement evn = li.children[1].children[3].children[1];
                    String szEvn = evn.innerText;
                    IHTMLElement service = li.children[1].children[3].children[2];
                    String szService = service.innerText;

                    //将获取的内容写入文本
                    using (StreamWriter sw = new StreamWriter(szResultPath, true))
                    {
                        sw.WriteLine(szTitle + "," + szStar + "," + szReviewNum + "," + szMeanPrice + "," + szCategory + "," + szAddress + "," + szTaste + "," + szEvn + "," + szService);
                    }
                }
                catch (Exception Ex)
                {
                    using (StreamWriter sw = new StreamWriter(szErrorPath, true))
                    {
                        sw.WriteLine(Ex.Message);
                    }

                    result.failed += 1;
                }

            }
        }

        private void PhaseHtml_HtmlAgilityPack(String pageHtml, String szResultPath, String szErrorPath, PhaseResultBean result)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(pageHtml);

            HtmlAgilityPack.HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("/html[1]/body[1]/div[4]/div[3]/div[1]/div[1]/div[2]/ul[1]/li");
            result.total += nodes.Count;

            foreach (HtmlAgilityPack.HtmlNode li in nodes)
            {
                try
                {
                    HtmlAgilityPack.HtmlNode titleA = li.SelectNodes("div[2]/div[1]/a[1]")[0];
                    HtmlAgilityPack.HtmlNode titleB = li.SelectNodes("div[2]/div[1]/a[2]") == null ? null : li.SelectNodes("div[2]/div[1]/a[2]")[0];
                    String szTitle = (titleA==null?"":titleA.InnerText) + "-" + (titleB == null ? "" : titleB.InnerText);
                    if (szTitle != null) szTitle = szTitle.Replace("\n", "");
                    if (szTitle != null) szTitle = szTitle.Replace(" ", "");

                    HtmlAgilityPack.HtmlNode star = li.SelectNodes("div[2]/div[2]/span[1]")[0];
                    String szStar = star.Attributes["title"].Value.ToString();

                    HtmlAgilityPack.HtmlNode reviewNum = li.SelectNodes("div[2]/div[2]/a[1]")[0];
                    String szReviewNum = reviewNum.InnerText;
                    if (szReviewNum != null) szReviewNum = szReviewNum.Replace("\n", "");
                    if (szReviewNum != null) szReviewNum = szReviewNum.Replace(" ", "");

                    HtmlAgilityPack.HtmlNode meanPrice = li.SelectNodes("div[2]/div[2]/a[2]")[0];
                    String szMeanPrice = meanPrice.InnerText;
                    if (szMeanPrice != null) szMeanPrice = szMeanPrice.Replace("\n", "");
                    if (szMeanPrice != null) szMeanPrice = szMeanPrice.Replace(" ", "");

                    HtmlAgilityPack.HtmlNode category = li.SelectNodes("div[2]/div[3]/a[1]")[0];
                    String szCategory = category.InnerText;

                    HtmlAgilityPack.HtmlNode addressA = li.SelectNodes("div[2]/div[3]/a[2]")[0];
                    HtmlAgilityPack.HtmlNode addressB = li.SelectNodes("div[2]/div[3]/span[1]")[0];
                    String szAddress = addressA.InnerText + "-" + addressB.InnerText;
                    if (szAddress != null) szAddress.Replace(",", "-");

                    HtmlAgilityPack.HtmlNode taste = li.SelectNodes("div[2]/span[1]/span[1]")[0];
                    String szTaste = taste.InnerText;
                    HtmlAgilityPack.HtmlNode evn = li.SelectNodes("div[2]/span[1]/span[2]")[0];
                    String szEvn = evn.InnerText;
                    HtmlAgilityPack.HtmlNode service = li.SelectNodes("div[2]/span[1]/span[3]")[0];
                    String szService = service.InnerText;

                    //将获取的内容写入文本
                    using (StreamWriter sw = new StreamWriter(szResultPath, true))
                    {
                        sw.WriteLine(szTitle + "," + szStar + "," + szReviewNum + "," + szMeanPrice + "," + szCategory + "," + szAddress + "," + szTaste + "," + szEvn + "," + szService);
                    }
                }
                catch (Exception Ex)
                {
                    using (StreamWriter sw = new StreamWriter(szErrorPath, true))
                    {
                        sw.WriteLine(Ex.Message);
                    }

                    result.failed += 1;
                }

            }
        }
    }

    class PhaseResultBean
    {
        public Boolean bSuccess;
        public int total;
        public int successed;
        public int failed;
    }

    public enum HTMLEnginType
    {
        HTMLEngin_mshtml,
        HTMLEngin_HtmlAgilityPack
    }

Python抓取网页内容

1、BeautifulSoup解析网页

'''
Created on 20150203
@author: Hansen
'''

import urllib2
import sys
import io
from bs4 import BeautifulSoup

#Fetch HTML from URL
def fecth_html(index,url,keepHtml,resultFile):
    req = urllib2.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0')
    rsp = urllib2.urlopen(req)
    content = rsp.read()
    #receive_header = rsp.info()
    #print(sys.getfilesystemencoding())
    #content = content.decode('utf-8','replace')
    
    if keepHtml:
        fileinfo = open(str(index)+'.html','w')
        fileinfo.write(content)
        print("save file "+ str(index)+'.html:    ok')
        
    parse_html(content,resultFile)
    
#Parse HTML
def parse_html(html,resultFile):
    soup = BeautifulSoup(html,fromEncoding="utf8")
    lis = soup.select('div.shop-all-list li')
    print(len(lis))
    for li in lis:
        szTitle = (li.select('div:nth-of-type(2) div:nth-of-type(1) a h4'))[0].get_text()
        szTitle = szTitle.replace("\r\n", "-").replace(" ","");
        szStar = (li.select('div:nth-of-type(2) div:nth-of-type(3) span'))[0]['title']
        szReviewNum = (li.select('div:nth-of-type(2) div:nth-of-type(3) a:nth-of-type(1)'))[0].get_text()
        szReviewNum = szReviewNum.replace("\n", "").replace(" ","");
        szMeanPrice = (li.select('div:nth-of-type(2) div:nth-of-type(3) a:nth-of-type(2)'))[0].get_text()
        szMeanPrice = szMeanPrice.replace("\n", "").replace(" ","");
        szCategory = (li.select('div:nth-of-type(2) div:nth-of-type(4) a:nth-of-type(1)'))[0].get_text()
        szAddressA = (li.select('div:nth-of-type(2) div:nth-of-type(4) a:nth-of-type(2)'))[0].get_text()
        szAddressB = (li.select('div:nth-of-type(2) div:nth-of-type(4) span:nth-of-type(3)'))[0].get_text()
        szAddress = (szAddressA+"-"+szAddressB).replace("\r\n", "-").replace(" ","");
        szTaste = (li.select('div:nth-of-type(2) span:nth-of-type(5) span:nth-of-type(1)'))[0].get_text()
        szEvn = (li.select('div:nth-of-type(2) span:nth-of-type(5) span:nth-of-type(2)'))[0].get_text()
        szService = (li.select('div:nth-of-type(2) span:nth-of-type(5) span:nth-of-type(3)'))[0].get_text()
        
        fileinfo = io.open(resultFile,'a',encoding='utf_16')
        fileinfo.write(szTitle+","+szStar+","+szReviewNum+","+szMeanPrice+","+szCategory+"," +szAddress+","+szTaste+","+szEvn+","+szService+"\n")

2、PyQuery解析网页

'''
Created on 20150203
@author: Hansen
'''

import urllib2
import sys
import io
from pyquery import PyQuery

#Fetch HTML from URL
def fecth_html(index,url,keepHtml,resultFile):
    req = urllib2.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0')
    rsp = urllib2.urlopen(req)
    content = rsp.read()
    #receive_header = rsp.info()
    #print(sys.getfilesystemencoding())
    #content = content.decode('utf-8','replace')
    
    if keepHtml:
        fileinfo = open(str(index)+'.html','w')
        fileinfo.write(content)
        print("save file "+ str(index)+'.html:    ok')
        
    parse_html(content,resultFile)
    
#Parse HTML
def parse_html(html,resultFile):
    doc = PyQuery(html)
    lis = doc('div.shop-all-list li')
    print(len(lis))
    for li in lis:
        li_doc = PyQuery(li)
        szTitle = li_doc('li div div a h4').text()
        szTitle = szTitle.replace("\r\n", "-").replace(" ","");
        szStar = li_doc("li div div span").filter('.sml-rank-stars').attr('title')
        szReviewNum = li_doc('li div div a').filter('.review-num').text()
        szReviewNum = szReviewNum.replace("\n", "").replace(" ","");
        szMeanPrice = li_doc('li div div a').filter('.mean-price').text()
        szMeanPrice = szMeanPrice.replace("\n", "").replace(" ","");
        szCategory = li_doc('li div div a span').filter('.tag').eq(1).text()
        szAddressA = li_doc('li div div a span').filter('.tag').eq(1).text()
        szAddressB = li_doc('li div div span').filter('.addr').eq(0).text()
        szAddress = (szAddressA+"-"+szAddressB).replace("\r\n", "-").replace(" ","");
        szTaste = li_doc('li div span span').eq(0).text()
        szEvn = li_doc('li div span span').eq(1).text()
        szService = li_doc('li div span span').eq(2).text()
        
        fileinfo = io.open(resultFile,'a',encoding='utf_16')
        fileinfo.write(szTitle+","+szStar+","+szReviewNum+","+szMeanPrice+","+szCategory+"," +szAddress+","+szTaste+","+szEvn+","+szService+"\n")

从WIN ADT复制到MAC ADT

Android SDK超级大(我现在有26.5G),而连接Google的速度超级慢,一次下载要好久。

最近准备在MAC进行Android开发,别的都好下载,但Adroid SDK就有些太大了,于是考虑从WIN下直接复制到MAC。

Android SDK主要由下面几部分组成:

/add-ons:GoogleAPI,操作系统无关
/build-tools:构建工具,这个和操作系统相关的,要重新下载
/docs:文档,操作系统无关
/extras:扩展内容,是否需要重新下载与其功能有关
/extras/android:操作系统无关
/extras/google:操作系统无关
/extras/intel:intel的硬件加速驱动,操作系统相关,需重新下载
/platforms:android平台,早期操作系统相关,后期将操作系统相关内容放到了build-tools中,可以全部拷贝过去,系统相关内容会自动重新下载
/platform-tools:管理工具,这个和操作系统相关的,要重新下载
/samples:demo,操作系统无关
/sources:android源码,操作系统无关
/system-images:android系统映像,操作系统无关
/temp:下载缓存,不需要拷贝
/tools:管理工具,这个和操作系统相关的,要重新下载

MAC设置AndroidStudio初始环境

1、从Oracle下载MAC版本的JDK7

http://www.oracle.com/technetwork/java/javase/downloads/index.html

2、从Google下载MAC版本Android Studio和Android SDK Manager

http://developer.android.com/sdk/index.html

3、安装JDK7、Android Studio、Android SDK Manager,并下载需要版本的Android SDK

4、此时双击Android Studio会报错:

Android Studio was unable to find a valid JVM.

这是因为Android Studio默认使用JDK1.6.*的原因

在Finder中打开Application文件夹,在“Android Studio.app”上右键,显示程序包内容
编辑Content/Info.plist,修改属性JVMOptions->JVMVersion一行,从1.6.*修改为1.7.*

5、此时双击Android Studio会尝试从Google获取最新的Android SDK信息

Fetching Android SDK component information

当然你读不到啦,只好屏蔽初始化方法:

编辑Content/idea.properties/bin/idea.properties文件,添加一行
disable.android.first.run=true

6、现在Android Studio可以启动了,但是不能新建项目
需要告诉Android Studio,Android SDK在哪里:

主界面-》Configure-》Project Defaults-》Project Structure-》Android SDK location
填写Android SDK的绝对路径,保存,然后就能新建应用了

7、那就新建一个简单应用测试一下吧,当然先是虚拟机:

Starting emulator for AVD 'new'
emulator: ERROR: x86 emulation currently requires hardware acceleration!
Please ensure Intel HAXM is properly installed and usable.
CPU acceleration status: HAX kernel module is not installed!

这是因为HAXM模块没有安装,前往Android SDK根目录

安装下面的软件:
PATH_TO_SDK/extras/intel/Hardware_Accelerated_Execution_Manager/IntelHAXM_1.1.1_for_10_9_and_above.dmg

8、然后就是在设备上测试,Android SDK找不到我的设备

#添加adb路径
echo "export PATH=${PATH}:/PATH_TO_SDK/android-sdk-macosx/platform-tools/">>~/.bash_profile
#然后刷新一下
source .bash_profile 
#添加设备厂商ID(我的是小米),可以在系统报告中看到设备厂商ID信息
echo"0x2717">>~/.android/adb_usb.ini
#杀掉adb服务
adb kill-server
#重启adb服务
adb sever
#PTP模式连上手机,当然要开启手机调试咯,这样就能看到设备了
adb devices

9、连上设备了,运行时报错:

Permission Denial: starting Intent { act=android.intent.action.MAIN cat=[android.intent.category.LAUNCHER] flg=0x10000000 cmp=com.neohope.testapp/.LoginActivity } from null (pid=29619, uid=2000) not exported from uid 10139

这个错误是因为Main Activity没有设置,在Manifest中对应的Activity中增加如下设置即可

<intent-filter>
    <action android:name="android.intent.action.MAIN" />
    <category android:name="android.intent.category.LAUNCHER" />
</intent-filter>

Wireshark过滤规则

Wireshark是一个很好用的sniffer工具,插件十分强大。有时,为了判断问题,在日常工作中也会用到一些,现在记录一部分常用的过滤规则:

1、运算符

lt < 小于
le <= 小于等于
eq == 等于
gt > 大于
ge >= 大于等于
ne != 不等
and && 与操作
or || 或操作
not ! 非操作

2、ip、mac和port过滤

#过滤ip地址为192.168.1.102的全部数据包
ip.addr eq 192.168.1.102
#过滤从192.168.1.102发来的数据包 或 发往192.168.1.102的数据包
ip.src eq 192.168.1.102 or ip.dst eq 192.168.1.102

#过滤MAC地址为00:00:00:01:02:03的所有包
eth.add eq 00:00:00:01:02:03
#过滤MAC地址为00:00:00:01:02:03发送的包 或 发往00:00:00:01:02:03的包
eth.src eq 00:00:00:01:02:03 or eth.dst eq 00:00:00:01:02:03

#过滤8080端口全部tcp数据包(接收和发送)
tcp.port eq 8080
#过滤8080端口发送的全部tcp数据包 或 发送到8080端口全部tcp数据包
tcp.srcport eq 8080 or tcp.dstport eq 8080

#过滤21端口全部udp数据包(接收和发送)
upd.port eq 21
#过滤21端口发送的全部udp数据包 或 发送到21端口全部udp数据包
udp.srcport eq 21 or udp.dstport eq 21

#过滤1024以内端口数据包
tcp.port <= 1024

#去掉tcp数据包
!tcp
#或
not tcp

3、包长度过滤

#udp报文长度等于100
udp.length eq 100

#udp数据长度等于92
udp.len eq 92

#经常可以用到eth(farme),ip,arp,tcp,udp等协议的包长度
#注意区分包长度和payload长度

4、HTTP协议

#过滤GET方法的HTTP数据包
http.request.method == "GET"

#按URI过滤HTTP数据包
http.request.uri == "/image/logo.gif"

#按内容过滤HTTP数据包
http contains "Content-Type: application/dicom"

5、匹配规则

\d 0-9的数字
\D \d的补集
\w 单词字符,即大小写字母、0-9的数字、下划线
\W \w的补集
\s 空白字符,包括换行符\n、回车符\r、制表符\t、垂直制表符\v、换页符\f
\S \s的补集
. 除换行符\n外的任意字符
.* 匹配任意文本
[…] 匹配[]内所列出的所有字符
[^…] 匹配非[]内所列出的字符
^ 表示其后的字符必须位于字符串的开始处
$ 表示其前面的字符必须位于字符串的结束处
\b 匹配一个单词的边界
\B 匹配一个非单词的边界
{n} 匹配前面的字符n次
{n,} 匹配前面的字符n次或多于n次
{n,m} 匹配前面的字符n到m次
? 匹配前面的字符0或1次
+ 匹配前面的字符1次或多于1次
* 匹配前面的字符0次或式于0次

6、包匹配

#字节匹配,比如匹配payload第一个字节0x14的UDP数据包
udp[8]==14

#范围匹配,经常使用到tcp[offset,n],比如匹配payload的前4个字节0x0004002a
udp[8:4]==00:04:00:2a

#正则匹配,要用match,比如匹配HTTP1.1协议的数据包
tcp[20:] matches "^GET [ -~]*HTTP/1.1\\x0d\\x0a"

7、示例

#DICOMClient 上抓取 DICOMServer
ip.dst eq 服务端ip &&  tcp.port eq 104

#DICOMServer 上抓取 DICOMClient
tcp.port eq 104

#HL7v2Client 上抓取 HL7v2Server
ip.dst eq 服务端ip &&  tcp.port eq 3306

#HL7v2Server上抓取 HL7v2Client 
tcp.port eq 3306

C#命令行打印PDF文档

1、使用Windows命令行
BAT Code

print /D:"\\COMPUTERNAME\打印机名" "PDF文件路径"

C# Code

ProcessStartInfo psInfo = new ProcessStartInfo();
psInfo.Verb = "Print"; // or "PrintTo"
psInfo.FileName = pdfFileName;
psInfo.Arguments = String.Format("/p /h \"{0}\" \"{1}\"", pdfFileName, printerName);
psInfo.WindowStyle = ProcessWindowStyle.Hidden;
psInfo.CreateNoWindow = true;
psInfo.UseShellExecute = true;
Process process = Process.Start(psInfo);

2、使用adobe命令行
BAT Code

#发送到默认打印机
AcroRd32.exe /s /o /h /p “PDF文件路径”
#发送到指定打印机
AcroRd32.exe /s /o /h /t “PDF文件路径” “打印机名” “驱动” “端口”

C# Code

ProcessStartInfo psInfo = new ProcessStartInfo();
psInfo.FileName = @"C:\Program Files\Adobe\Reader 9.0\Reader\AcroRd32.exe";
psInfo.Arguments = String.Format("/s /o /h /p{0}", pdfFileName);
psInfo.WindowStyle = ProcessWindowStyle.Hidden;
psInfo.CreateNoWindow = true;
psInfo.UseShellExecute = true;

Process p= Process.Start(psInfo);
p.WaitForInputIdle();
System.Threading.Thread.Sleep(3000);
if (false == p.CloseMainWindow())p.Kill();

3、使用gostscript命令行
BAT Code

gswin32c.exe -dPrinted -dBATCH -dNOPAUSE -dNOSAFER -q -dNumCopies=1 -sDEVICE=ljet4 -sOutputFile="\\spool\打印机名称" "PDF文件路径"

C# Code

ProcessStartInfo psInfo = new ProcessStartInfo();
psInfo.Arguments = String.Format(" -dPrinted -dBATCH -dNOPAUSE -dNOSAFER -q -dNumCopies=1 -sDEVICE=ljet4 -sOutputFile=\"\\\\spool\\{0}\" \"{1}\"", printerName, pdfFileName);
psInfo.FileName = @"C:\Program Files\gs\gs8.70\bin\gswin32c.exe";
psInfo.UseShellExecute = false;

Process p= Process.Start(psInfo);
p.WaitForInputIdle();
System.Threading.Thread.Sleep(3000);
if (false == p.CloseMainWindow())p.Kill();

4、使用PrintDocument+第三方渲染插件(参考资料3)

PrintDocument pd = new PrintDocument();
pd.DocumentName = pdfName;
pd.PrinterSettings.PrinterName =printerName;
pd.PrinterSettings.PrintFileName = fileName;
pd.PrintController = new StandardPrintController();
pd.OriginAtMargins = false;
pd.PrintPage += new PrintPageEventHandler(this.pd_PrintPage);
pd.Print();

参考资料:
PDF: direct printing with .NET
How to Silently Print PDFs using Adobe Reader and C#
100% .NET component for rendering PDF documents