Julia DataFrame:创建col值的新列总和:x by:y

时间:2017-05-16 17:08:59

标签: dataframe julia

我有DataFrame次x和y次出现。我想计算每次事件在DataFrame中发生的频率以及组合所代表的:y次出现的百分比。我现在已经完成了第一部分,感谢a previous question

using DataFrames
mydf = DataFrame(y = rand('a':'h', 1000), x = rand('i':'p', 1000))
mydfsum = by(mydf, [:x, :y], df -> DataFrame(n = length(df[:x])))

这会成功创建一个列,用于计算:x的每个值与:y的每个值的出现频率。:y。现在,我需要能够生成一个新列,用于计算DataFrame的每个值出现的频率。我接下来可以使用:

创建一个新的mydfsumy = by(mydf, [:y], df -> DataFrame(ny = length(df[:x])))
DataFrames

加入mydfsum = join(mydfsum, mydfsumy, on = :y)

:yp

并创建百分比mydfsum[:yp] = mydfsum[:n] ./ mydfsum[:ny]

dplyr

但对于常见的数据管理问题,这似乎是一个笨重的解决方法。在R中,我将使用mydf %>% groupby(x,y) %>% summarize(n = n()) %>% groupby(y) %>% mutate(yp = n/sum(n))

在一行中完成所有这些操作
var InteractiveMap = L.Class.extend({

options: {
    'redIcon' : L.icon({
        iconUrl: 'img/leaflet-color-markers/marker-icon-red.png',
        iconRetinaUrl: 'img/leaflet-color-markers/marker-2x-red.png',
        iconSize: [25, 41],
        iconAnchor: [12, 41],
        popupAnchor: [1, -34],
        shadowSize: [41, 41]
        //shadowUrl: 'my-icon-shadow.png',
       //shadowRetinaUrl: 'my-icon-shadow@2x.png',           
        //shadowAnchor: [22, 94]
    }),
    'greyIcon' : L.icon({
        iconUrl: 'img/leaflet-color-markers/marker-icon-grey.png',
        iconRetinaUrl: 'img/leaflet-color-markers/marker-icon-2x-grey.png',
        iconSize: [25, 41],
        iconAnchor: [12, 41],
        popupAnchor: [1, -34],
        shadowSize: [41, 41]
        //shadowUrl: 'my-icon-shadow.png',
        //shadowRetinaUrl: 'my-icon-shadow@2x.png',           
        //shadowAnchor: [22, 94]
    })
},

initialize: function(options) {       
    L.setOptions(this, options);
    if (null != this.options.cartoDbOptions) {
        this.initMapAsCartoDbViz();
    } else {
        this.initMapInUsualWay();
    }
},

initMapInUsualWay: function() {
    this._map = L.map(this.options.mapDivId, this.options.mapOptions);

    this.initBaseMapLayers();

    L.control.scale( { position : 'bottomright' } ).addTo(this._map);
    this.completeInit();
},

initMapAsCartoDbViz: function() {
    var _interactiveMap = this;
    cartodb.createVis(this.options.mapDivId, this.options.cartoDbOptions.vizURL,
        L.extend({
            center_lat: this.options.mapOptions.center.lat,
            center_lon: this.options.mapOptions.center.lng,
            zoom: this.options.mapOptions.zoom
        }, this.options.cartoDbOptions.vizOptions)
    )
    .done(function (vis, layers) {
        // layer 0 is the base layer, layer 1 is cartodb layer
        // setInteraction is disabled by default
        _interactiveMap._map = vis.getNativeMap();
        _interactiveMap.completeInit();
    })
    .error(function (err) {
        console.log(err);
        _interactiveMap.initMapInUsualWay();
    });
},

initBaseMapLayers: function() {
    var baseMapLayerDescription = [
        { 'id' :"Carto", 'name' : 'Carto', 'default' : 'true',
                init : function() {
                    var layer = L.tileLayer('https://cartodb-basemaps-{s}.global.ssl.fastly.net/light_all/{z}/{x}/{y}.png', {
                        maxZoom: 18, attribution: '&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>, &copy;<a href="https://carto.com/attribution">CARTO</a>'
                    });
                    return layer;
                }
        }
    ];
    this._baseMaps = {};       
    for (i=0, arrLen = baseMapLayerDescription.length; i < arrLen; i++) {    
        var tileLayer = baseMapLayerDescription[i].init();           
        this._baseMaps[baseMapLayerDescription[i].name] = tileLayer;
        if (baseMapLayerDescription[i].default) {
            tileLayer.addTo(this._map);
        }
    }
},

completeInit: function() {
    this.initCustomControlsOnMap();
    this._getJSON(this.options.dataUrl, this.addJsonToMap, this);
    this.initSideBar();
},

initCustomControlsOnMap: function() {
    //controlLayers = L.control.layers(this._baseMaps, this._overlayLayers);
    //controlLayers.addTo(this._map);
},

initSideBar: function() {
    this._sideMenuTemplate = document.getElementById('sideMenuTemplate').innerText;
    Mustache.parse(this._sideMenuTemplate);

    this._sidebar = L.control.sidebar('sidebar', {
        closeButton: true,
        position: 'right'
    });
    this._map.addControl(this._sidebar);
},

addJsonToMap: function(data) {
    var lineLatLngs = [];
    for (var i = 0, arrLenght = data.length; i < arrLenght; i++) {
        var city = data[i],
            location = city.location.split(','),
            marker;
        if (city.active) {
            marker = L.marker(location, { icon : this.options.redIcon });
            lineLatLngs.push(marker.getLatLng());
            marker.cityDetails = city;
            marker.cityDetails.showHotels = !(null == city.hotels);
            marker.cityDetails.showTours = !(null == city.tours);
            marker.on('click', function () {
                //interactiveMap._sidebar.setContent('');
                interactiveMap._sidebar.getContainer().scrollTop = 0;
                var sideMenuContext = Mustache.render(interactiveMap._sideMenuTemplate, this.cityDetails);
                interactiveMap._sidebar.setContent(sideMenuContext);
                interactiveMap._sidebar.show();
            });
        } else {
            if ('0' === L.version.substr(0, 1)) { // v.0.7.x doesn't implemented marker.bindTooltip
                marker = L.marker(location, {
                    icon : this.options.greyIcon,
                    title : 'We haven\'t visited this city yet! Check back next week for new cities.'
                });
            } else {
                marker = L.marker(location, { icon : this.options.greyIcon });
                marker.bindTooltip('We haven\'t visited this city yet!<br/>Check back next week for new cities.');
            }
            marker.on('click', function () {
                interactiveMap._sidebar.hide();
            });
        }
        marker.addTo(this._map);
    }
    L.polyline(lineLatLngs, {dashArray: "5, 5"}).addTo(this._map);
},

_getJSON: function(url, callback, callbackContext) {
    var xhr = new XMLHttpRequest();
    xhr.open('GET', url, true);
    //xhr.responseType = 'json';
    //xhr.setRequestHeader('Accept', 'application/json');
    xhr.onload = function () {
        if (xhr.readyState === 4 && xhr.status >= 200 && xhr.status <= 304 && xhr.response) {
            //callback.call(callbackContext, xhr.response);
            callback.call(callbackContext, JSON.parse(xhr.response));
        } else {
            console.log('getJSON error', xhr);
        }
    };
    xhr.send();
}
});


var interactiveMap = new InteractiveMap( {
    mapDivId : 'map',
    mapOptions : {
        center: L.latLng(37.19533058280065, -98.87695312500001),
        zoom: 4,
        zoomControl : false,
        dragging : false,
        doubleClickZoom : false,
        scrollWheelZoom : false
    },

    cartoDbOptions : {
        vizURL : 'http://telegul.carto.com/api/v2/viz/b167e126-38e5-11e7-ba84-0e3ebc282e83/viz.json', // <-- INSERT {YOUR vizjson_url}
        vizOptions : {
            shareable: false,
            title: false,
            description: false,
            search: false,
            zoomControl: false,
            cartodb_logo : false,
            tiles_loader: true,
            infowindow: false,
            layer_selector: false,           
            scrollwheel: false,
            layer_selector: false,
            legends: false
        }
    },
    dataUrl : 'data/ExperienceDomesticMap.json'
} );

1 个答案:

答案 0 :(得分:2)

您可以在一行中完成:

mydfsum = by(mydf, :y, df -> by(df, :x, dd -> DataFrame(n = size(dd,1), yp = size(dd,1)/size(df,1))))

或者,如果难以阅读,您可以对匿名函数使用do表示法:

mydfsum = by(mydf,:y) do df by(df, :x) do dd DataFrame(n = size(dd,1), yp = size(dd,1)/size(df,1)) end end

您在R中所做的事实上是byx上的第一个y,然后改变输出的一列。您也可以这样做,但您需要先创建该列。在这里,我首先使用零初始化yp列,然后使用另一个by对其进行修改。

mydfsum = by(mydf,[:x,:y], df -> DataFrame(n = size(df,1), yp = 0.)) by(mydfsum, :y, df -> (df[:yp] = df[:n]/sum(df[:n])))

对于更高级的数据操作,您可能需要查看Query.jl