From 3431e667a5c6475043ebfd97b43a3fdc4b078596 Mon Sep 17 00:00:00 2001 From: Phil Burton Date: Mon, 25 Feb 2019 13:37:59 +0000 Subject: Refactor and clean up notes --- day1/diving-deep-into-blockchain.txt | 100 +++++++++++++++++++++++++++++ day1/first-talk.txt | 105 ------------------------------- day1/massively-scaled-microservices.txt | 108 ++++++++++++++++++++++++++++++++ day1/no-sql.txt | 57 ----------------- day1/non-scalar-data.txt | 59 +++++++++++++++++ day1/second-talk.txt | 97 ---------------------------- day1/supervised-learning.txt | 77 +++++++++++++++++++++++ 7 files changed, 344 insertions(+), 259 deletions(-) create mode 100644 day1/diving-deep-into-blockchain.txt delete mode 100644 day1/first-talk.txt create mode 100644 day1/massively-scaled-microservices.txt delete mode 100644 day1/no-sql.txt create mode 100644 day1/non-scalar-data.txt delete mode 100644 day1/second-talk.txt create mode 100644 day1/supervised-learning.txt (limited to 'day1') diff --git a/day1/diving-deep-into-blockchain.txt b/day1/diving-deep-into-blockchain.txt new file mode 100644 index 0000000..8fe9218 --- /dev/null +++ b/day1/diving-deep-into-blockchain.txt @@ -0,0 +1,100 @@ +Block chain + +Tomasz Kowalczyk +https://joind.in/event/php-uk-conference-2019/dive-deep-into-blockchain + +- Block chain is a database +- Bespoke data structure + +"Not crpyto" + +General Purpose block chain: + +Why? +- Data in block is immutable +- Don't edit or remove, you append +- traceability (Auditing / logging) +- Verifiability (Data must be fully valid) +- reproducibility +- decentralisation +- Trust-less (Removes need for trust) +- transparency + +How? +- What type? + - public - will be hacked + - shared - shared ledger + - private +- Cryptography + - pub/rivate keys + - sign information + - verifablity + - Elliptic Curves + - openssl ecparam -list_curves + - Addressing + - can shorten public keys massively + - Hash algorithm + - Integrity + - 2x sha256 (sha256(sha256($data))) + - 4B version, 32B hash of last Block, 32B hash of data block, 4B timestamp, 4B difficulty, 4B nonce + - Merkle tree - binary hash tree + - Merkle path - list of hashes through the tree + - Genesis block - First block in the chain + - Special as it's first + - Prevents exploitation by being hardcoded + +- consensus + - Confidence + - chain forking needs resolution + - bitcoin users longest chain wins + - proof of work / proof of stake + - difficulty is no of zeros that need to be at the start of the hash + - others (proof of burn, proof of time, proof of ....) + + +- Smart contracts + - Programs that are included in the chain + - e.g. A loan paid by the chain + +- Lighting network (drawback off blockhain is it focuses on integrity not perf) + - Off chain transactions with on-chain settlement + +- Not all blockchains are chains + - IOTA Tangle + +- Challenges + - Fault tolerance + - You lose control of public/shared blockchains + - Now way to enforce software updates + - Conflict resolution + - Integration with current systems + - Security + - Cryptography is first and last line of defence + +- Not perfect for: + - scalability + - performance + - throughput + +- Incentives + - Mining in currencies + - Power for money +- Privacy + - Protect data + +- Hard forks +- 51% attacks + +- Right solution for the right kind of problem + +- Event sourcing on steroids + +QUESTIONS +- Any php recommendations? + - No. +- security? + - Store data not values somewhere else + - Encrypt +- Why private blocks? + - Can give public list of hashes for verification + - keep data private diff --git a/day1/first-talk.txt b/day1/first-talk.txt deleted file mode 100644 index da8689e..0000000 --- a/day1/first-talk.txt +++ /dev/null @@ -1,105 +0,0 @@ -# micro services - perf - -## Overivew - -glu mobile - microservices - -- Use docker -- HTTP processing -- Amazon ECS -- NOSQL Redis + couch base -- 40M votes a day -- 1M active users -- 14K req/m - -## Microservices - -- Small -- Simole -- easy to deploy -- upgradable - -Use Base images -Name by version and never change (Freeze them) - -## background processing -- Execute externally -- Shutdown functions -- Job server -- fastcgi_finish_request - -## HTTP Compression - -- gzip on -- gzip_types -- gzip_proxied any -- gzip_comp_level 5 -- gzip_min_length (default 20) -- Smaller than 1500B means it will always fit in one TCP packet - -If content_length is not set then nginx will compress always - -Laravel/Lume/Others add middleware to add content_length - -## Caching -- HTTP2 Server push -- Repeated server calls HTTP1 -- 304s - - Last-modified - - cache-control - -## No SQL -- Mongo/Redis -- Fight network I/O -- save hdd space -- compress large messages -- serialise better -- message templates -- add ttl expiry -- shorten cache keys - -## Fast caching -- PHP Redis -- Predis - high memory, slower -- Persistent connections available - -## Deployment EC8 -- 0 Downtime -- quick rollout -- ELB health-check -- Auto scaling - -## Hardware upgrades -- Doubling the hardware, halving the # of machines - -## VPC -- Closer together -- Same location -- utilise internal networks -- other thoughts - - Async I/O - - swoole - - HTTP/2 - - Tars - - binary (80% reduction) - -## Managing images -- Don't use aptitude (apt) -- Build from source -- fpm and Nginx together - - Separate not helpful - - No need for TCP/IP overhead -- supervisorD - everywhere - -# Useful terms -aerospike -couch base -php_swoole -supervisorD -new relic -bugsnag -amazon cloud watch -sonarqube -sensolabs security checker -fastcgi_finish_request -crowdstar https://github.com/Crowdstar/background-processing diff --git a/day1/massively-scaled-microservices.txt b/day1/massively-scaled-microservices.txt new file mode 100644 index 0000000..3b6dbe0 --- /dev/null +++ b/day1/massively-scaled-microservices.txt @@ -0,0 +1,108 @@ +# micro services - perf + +https://joind.in/event/php-uk-conference-2019/massively-scaled-high-performance-web-services-with-php +https://www.slideshare.net/DeminYin/massively-scaled-high-performance-web-services-with-php-132696547 + +## Overivew + +glu mobile - microservices + +- Use docker +- HTTP processing +- Amazon ECS +- NOSQL Redis + couch base +- 40M votes a day +- 1M active users +- 14K req/m + +## Microservices + +- Small +- Simole +- easy to deploy +- upgradable + +Use Base images +Name by version and never change (Freeze them) + +## background processing +- Execute externally +- Shutdown functions +- Job server +- fastcgi_finish_request + +## HTTP Compression + +- gzip on +- gzip_types +- gzip_proxied any +- gzip_comp_level 5 +- gzip_min_length (default 20) +- Smaller than 1500B means it will always fit in one TCP packet + +If content_length is not set then nginx will compress always + +Laravel/Lume/Others add middleware to add content_length + +## Caching +- HTTP2 Server push +- Repeated server calls HTTP1 +- 304s + - Last-modified + - cache-control + +## No SQL +- Mongo/Redis +- Fight network I/O +- save hdd space +- compress large messages +- serialise better +- message templates +- add ttl expiry +- shorten cache keys + +## Fast caching +- PHP Redis +- Predis - high memory, slower +- Persistent connections available + +## Deployment EC8 +- 0 Downtime +- quick rollout +- ELB health-check +- Auto scaling + +## Hardware upgrades +- Doubling the hardware, halving the # of machines + +## VPC +- Closer together +- Same location +- utilise internal networks +- other thoughts + - Async I/O + - swoole + - HTTP/2 + - Tars + - binary (80% reduction) + +## Managing images +- Don't use aptitude (apt) +- Build from source +- fpm and Nginx together + - Separate not helpful + - No need for TCP/IP overhead +- supervisorD - everywhere + +# Useful terms +aerospike +couch base +php_swoole +supervisorD +new relic +bugsnag +amazon cloud watch +sonarqube +sensolabs security checker +fastcgi_finish_request +crowdstar https://github.com/Crowdstar/background-processing diff --git a/day1/no-sql.txt b/day1/no-sql.txt deleted file mode 100644 index ccb6a4a..0000000 --- a/day1/no-sql.txt +++ /dev/null @@ -1,57 +0,0 @@ -# Non-Scalar Data - -## Redis sets SSAD - -- key binding - safe string -- sets of data -- taggable - -## Document data stores -- Usually JSON -- Richer -- MongoDB - single master -- CouchDB - multi master -- ES - text search index - -## MongoDB -- pecl/mongodb -- mongodb/mongodb - -docs are PHP objects or assoc arrays - -## Relational Database -- normalisation - -- postgres - - HSTORE - - JSON/JSONB - - -# Querying data - -Sets - SISMEMBER and SMEMBER -- Aggregations -- buckets -- postgres - non standard - -## Redis -- Can add to sets -- Atomic -- fast -- counters - -DONT: RETRIVE MANIPULATE STORE - - Do we do this at d3R? - - redis bad at scaling - - joned.in/25887 - - -# Useful links -https://redis.io/commands/sadd -https://www.postgresql.org/docs/9.1/hstore.html -https://wiki.openstreetmap.org/wiki/PostgreSQL -https://en.m.wikipedia.org/wiki/Benford%27s_law -https://php-ml.readthedocs.io/en/latest/machine-learning/workflow/pipeline/ diff --git a/day1/non-scalar-data.txt b/day1/non-scalar-data.txt new file mode 100644 index 0000000..908cf84 --- /dev/null +++ b/day1/non-scalar-data.txt @@ -0,0 +1,59 @@ +# Non-Scalar Data +https://joind.in/event/php-uk-conference-2019/storing-non-scalar-data +Derick Rethans + +## Redis sets SSAD + +- key binding - safe string +- sets of data +- taggable + +## Document data stores +- Usually JSON +- Richer +- MongoDB - single master +- CouchDB - multi master +- ES - text search index + +## MongoDB +- pecl/mongodb +- mongodb/mongodb + +docs are PHP objects or assoc arrays + +## Relational Database +- normalisation + +- postgres + - HSTORE + - JSON/JSONB + + +# Querying data + +Sets - SISMEMBER and SMEMBER +- Aggregations +- buckets +- postgres - non standard + +## Redis +- Can add to sets +- Atomic +- fast +- counters + +DONT: RETRIVE MANIPULATE STORE + + Do we do this at d3R? + + redis bad at scaling + + joned.in/25887 + + +# Useful links +https://redis.io/commands/sadd +https://www.postgresql.org/docs/9.1/hstore.html +https://wiki.openstreetmap.org/wiki/PostgreSQL +https://en.m.wikipedia.org/wiki/Benford%27s_law +https://php-ml.readthedocs.io/en/latest/machine-learning/workflow/pipeline/ diff --git a/day1/second-talk.txt b/day1/second-talk.txt deleted file mode 100644 index 46df43f..0000000 --- a/day1/second-talk.txt +++ /dev/null @@ -1,97 +0,0 @@ -Block chain - -- Block chain is a database -- Bespoke data structure - -"Not crpyto" - -General Purpose block chain: - -Why? -- Data in block is immutable -- Don't edit or remove, you append -- traceability (Auditing / logging) -- Verifiability (Data must be fully valid) -- reproducibility -- decentralisation -- Trust-less (Removes need for trust) -- transparency - -How? -- What type? - - public - will be hacked - - shared - shared ledger - - private -- Cryptography - - pub/rivate keys - - sign information - - verifablity - - Elliptic Curves - - openssl ecparam -list_curves - - Addressing - - can shorten public keys massively - - Hash algorithm - - Integrity - - 2x sha256 (sha256(sha256($data))) - - 4B version, 32B hash of last Block, 32B hash of data block, 4B timestamp, 4B difficulty, 4B nonce - - Merkle tree - binary hash tree - - Merkle path - list of hashes through the tree - - Genesis block - First block in the chain - - Special as it's first - - Prevents exploitation by being hardcoded - -- consensus - - Confidence - - chain forking needs resolution - - bitcoin users longest chain wins - - proof of work / proof of stake - - difficulty is no of zeros that need to be at the start of the hash - - others (proof of burn, proof of time, proof of ....) - - -- Smart contracts - - Programs that are included in the chain - - e.g. A loan paid by the chain - -- Lighting network (drawback off blockhain is it focuses on integrity not perf) - - Off chain transactions with on-chain settlement - -- Not all blockchains are chains - - IOTA Tangle - -- Challenges - - Fault tolerance - - You lose control of public/shared blockchains - - Now way to enforce software updates - - Conflict resolution - - Integration with current systems - - Security - - Cryptography is first and last line of defence - -- Not perfect for: - - scalability - - performance - - throughput - -- Incentives - - Mining in currencies - - Power for money -- Privacy - - Protect data - -- Hard forks -- 51% attacks - -- Right solution for the right kind of problem - -- Event sourcing on steroids - -QUESTIONS -- Any php recommendations? - - No. -- security? - - Store data not values somewhere else - - Encrypt -- Why private blocks? - - Can give public list of hashes for verification - - keep data private diff --git a/day1/supervised-learning.txt b/day1/supervised-learning.txt new file mode 100644 index 0000000..1dce7da --- /dev/null +++ b/day1/supervised-learning.txt @@ -0,0 +1,77 @@ +# Learning: the hows and whys of machine learning + +Liam Wiltshire +https://liam-wiltshire.github.io/talks/?talk=machinelearning&conference=phpuk +https://joind.in/event/php-uk-conference-2019/learning-the-hows-and-whys-of-machine-learning + +## Overivew + +Charge backs + +## Supervised learning +Training data +Learning functions +Categorisation / Classification +Regression - Where do we sit on a line + +## Naive Bayes classifier +Standardise words +- Un pluralise +- Un gender +- Un tense +- etc + +More data == better + +## Tokenisation +https://en.wikipedia.org/wiki/Benford%27s_law +https://php-ml.readthedocs.io + +Unique tokens for each unique context + +## Imbalanced data +One category has more database +99% data not charge back +Just being accurate, not very helpful + - Started by flagging 100% as fine. + - Need to collect more data, change methods, resample data + +## Understand data +- context +- Common data vs specific data +- Continuous vs discrete data + +## KNN +K Nearest Number +https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm + - Distances + - less sensitive to imbalance + - Keep K odd (no draws) + +## Handling nominal data + +Binary +- Increase amounts of dimensions +- normalisation required +- equal scales + +## Contextless data is meaningless +Is it normal? + +## Next to try +Weighting +Different dimensions +Change K value (was 3NN) +Remove outliers +Diff distance function +weighted distance + + + + +# Useful links +https://en.wikipedia.org/wiki/Benford%27s_law +https://php-ml.readthedocs.io +https://liam-wiltshire.github.io/talks/?talk=machinelearning&conference=phpuk +https://joind.in/event/php-uk-conference-2019/learning-the-hows-and-whys-of-machine-learning +https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm -- cgit v1.2.3